feat: support to define a guardrail task no-code

2026-01-09 08:08:32 +00:00 · 2025-04-21 18:59:56 -03:00
parent 685d20f46c
commit 91b618b4e0
9 changed files with 1307 additions and 15 deletions
--- a/docs/concepts/tasks.mdx
+++ b/docs/concepts/tasks.mdx
@@ -322,6 +322,14 @@ blog_task = Task(
   - On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)` 
   - On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`

+### GuardrailTask
+
+The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
+
+#### Code Execution
+
+The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
+
 ### Error Handling Best Practices

 1. **Structured Error Responses**:
@@ -750,6 +758,8 @@ Task guardrails provide a powerful way to validate, transform, or filter task ou

 ### Basic Usage

+#### Define your own logic to validate
+
 ```python Code
 from typing import Tuple, Union
 from crewai import Task
@@ -769,6 +779,34 @@ task = Task(
 )
 ```

+#### Leverage a no-code approach for validation
+
+```python Code
+from crewai import Task
+
+task = Task(
+    description="Generate JSON data",
+    expected_output="Valid JSON object",
+    guardrail="Ensure the response is a valid JSON object"
+)
+```
+
+#### Use custom models for code generation
+
+```python Code
+from crewai import Task
+from crewai.llm import LLM
+
+task = Task(
+    description="Generate JSON data",
+    expected_output="Valid JSON object",
+    guardrail=GuardrailTask(
+        description="Ensure the response is a valid JSON object",
+        llm=LLM(model="gpt-4o-mini"),
+    )
+)
+```
+
 ### How Guardrails Work

 1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -140,7 +140,7 @@ class Task(BaseModel):
        default=None,
    )
    processed_by_agents: Set[str] = Field(default_factory=set)
-    guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
+    guardrail: Optional[Union[Callable[[TaskOutput], Tuple[bool, Any]], str]] = Field(
        default=None,
        description="Function to validate task output before proceeding to next task",
    )
@@ -157,8 +157,12 @@ class Task(BaseModel):

    @field_validator("guardrail")
    @classmethod
-    def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
-        """Validate that the guardrail function has the correct signature and behavior.
+    def validate_guardrail_function(
+        cls, v: Optional[str | Callable]
+    ) -> Optional[str | Callable]:
+        """
+        If v is a callable, validate that the guardrail function has the correct signature and behavior.
+        If v is a string, return it as is.

        While type hints provide static checking, this validator ensures runtime safety by:
        1. Verifying the function accepts exactly one parameter (the TaskOutput)
@@ -171,16 +175,16 @@ class Task(BaseModel):
        - Clear error messages help users debug guardrail implementation issues

        Args:
-            v: The guardrail function to validate
+            v: The guardrail function to validate or a string describing the guardrail task

        Returns:
-            The validated guardrail function
+            The validated guardrail function or a string describing the guardrail task

        Raises:
            ValueError: If the function signature is invalid or return annotation
                       doesn't match Tuple[bool, Any]
        """
-        if v is not None:
+        if v is not None and callable(v):
            sig = inspect.signature(v)
            positional_args = [
                param
@@ -408,9 +412,7 @@ class Task(BaseModel):
            )

            if self.guardrail:
-                guardrail_result = GuardrailResult.from_tuple(
-                    self.guardrail(task_output)
-                )
+                guardrail_result = self._process_guardrail(task_output)
                if not guardrail_result.success:
                    if self.retry_count >= self.max_retries:
                        raise Exception(
@@ -464,13 +466,52 @@ class Task(BaseModel):
                    )
                )
                self._save_file(content)
-            crewai_event_bus.emit(self, TaskCompletedEvent(output=task_output, task=self))
+            crewai_event_bus.emit(
+                self, TaskCompletedEvent(output=task_output, task=self)
+            )
            return task_output
        except Exception as e:
            self.end_time = datetime.datetime.now()
            crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self))
            raise e  # Re-raise the exception after emitting the event

+    def _process_guardrail(self, task_output: TaskOutput) -> GuardrailResult:
+        if self.guardrail is None:
+            raise ValueError("Guardrail is not set")
+
+        from crewai.utilities.events import (
+            GuardrailTaskCompletedEvent,
+            GuardrailTaskStartedEvent,
+        )
+        from crewai.utilities.events.crewai_event_bus import crewai_event_bus
+
+        crewai_event_bus.emit(
+            self,
+            GuardrailTaskStartedEvent(
+                guardrail=self.guardrail, retry_count=self.retry_count
+            ),
+        )
+
+        if isinstance(self.guardrail, str):
+            from crewai.tasks.guardrail_task import GuardrailTask
+
+            result = GuardrailTask(description=self.guardrail, task=self)(task_output)
+        else:
+            result = self.guardrail(task_output)
+
+        guardrail_result = GuardrailResult.from_tuple(result)
+
+        crewai_event_bus.emit(
+            self,
+            GuardrailTaskCompletedEvent(
+                success=guardrail_result.success,
+                result=guardrail_result.result,
+                error=guardrail_result.error,
+                retry_count=self.retry_count,
+            ),
+        )
+        return guardrail_result
+
    def prompt(self) -> str:
        """Prompt the task.

--- a/src/crewai/tasks/guardrail_task.py
+++ b/src/crewai/tasks/guardrail_task.py
@@ -0,0 +1,154 @@
+from typing import Any, Tuple
+
+from crewai.llm import LLM
+from crewai.task import Task
+from crewai.tasks.task_output import TaskOutput
+from crewai.utilities.printer import Printer
+
+
+class GuardrailTask:
+    """A task that validates the output of another task using generated Python code.
+
+    This class generates and executes Python code to validate task outputs based on
+    specified criteria. It uses an LLM to generate the validation code and provides
+    safety guardrails for code execution.
+
+    Args:
+        description (str): The description of the validation criteria.
+        task (Task, optional): The task whose output needs validation.
+        llm (LLM, optional): The language model to use for code generation.
+        additional_instructions (str, optional): Additional instructions for the guardrail task.
+
+    Raises:
+        ValueError: If no valid LLM is provided.
+    """
+
+    def __init__(
+        self,
+        description: str,
+        task: Task | None = None,
+        llm: LLM | None = None,
+        unsafe_mode: bool = False,
+        additional_instructions: str = "",
+    ):
+        self.description = description
+        self.unsafe_mode: bool = unsafe_mode
+
+        fallback_llm: LLM | None = (
+            task.agent.llm
+            if task is not None
+            and hasattr(task, "agent")
+            and task.agent is not None
+            and hasattr(task.agent, "llm")
+            else None
+        )
+        self.llm: LLM | None = llm or fallback_llm
+
+        self.additional_instructions = additional_instructions
+
+    @property
+    def system_instructions(self) -> str:
+        """System instructions for the LLM code generation.
+
+        Returns:
+            str: Complete system instructions including security constraints.
+        """
+        security_instructions = (
+            "- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
+            "- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
+            "- Your code must not perform any file I/O, shell access, or dynamic code execution."
+        )
+        return (
+            "You are a expert Python developer"
+            "You **must strictly** follow the task description, use the provided raw output as the input in your code. "
+            "Your code must:\n"
+            "- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n"
+            "- Use the literal string of the task output (already included in your input) if needed.\n"
+            "- Generate the code **following strictly** the task description.\n"
+            "- Be valid Python 3 — executable as-is.\n"
+            f"{security_instructions}\n"
+            "Additional instructions (do not override the previous instructions):\n"
+            f"{self.additional_instructions}"
+        )
+
+    def user_instructions(self, task_output: TaskOutput) -> str:
+        """Generates user instructions for the LLM code generation.
+
+        Args:
+            task_output (TaskOutput): The output to be validated.
+
+        Returns:
+            str: Instructions for generating validation code.
+        """
+        return (
+            "Based on the task description below, generate Python 3 code that validates the task output. \n"
+            "Task description:\n"
+            f"{self.description}\n"
+            "Here is the raw output from the task: \n"
+            f"'{task_output.raw}' \n"
+            "Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
+            "Now generate Python code that follows the instructions above."
+        )
+
+    def generate_code(self, task_output: TaskOutput) -> str:
+        """Generates Python code for validating the task output.
+
+        Args:
+            task_output (TaskOutput): The output to be validated.
+
+        Returns:
+            str: Generated Python code for validation.
+        """
+        if self.llm is None:
+            raise ValueError("Provide a valid LLM to the GuardrailTask")
+
+        response = self.llm.call(
+            messages=[
+                {
+                    "role": "system",
+                    "content": self.system_instructions,
+                },
+                {
+                    "role": "user",
+                    "content": self.user_instructions(task_output=task_output),
+                },
+            ]
+        )
+
+        printer = Printer()
+        printer.print(
+            content=f"The following code was generated for the guardrail task:\n{response}\n",
+            color="cyan",
+        )
+        return response
+
+    def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
+        """Executes the validation code on the task output.
+
+        Args:
+            task_output (TaskOutput): The output to be validated.
+
+        Returns:
+            Tuple[bool, Any]: A tuple containing:
+                - bool: True if validation passed, False otherwise
+                - Any: The validation result or error message
+        """
+        import ast
+
+        from crewai_tools import CodeInterpreterTool
+
+        code = self.generate_code(task_output)
+        result = CodeInterpreterTool(code=code, unsafe_mode=self.unsafe_mode).run()
+
+        error_messages = [
+            "Something went wrong while running the code",
+            "No result variable found",  # when running in unsafe mode, the final output should be stored in the result variable
+        ]
+
+        if any(msg in result for msg in error_messages):
+            return False, result
+
+        if isinstance(result, str):
+            result = ast.literal_eval(result)
+
+        return result
--- a/src/crewai/utilities/events/init.py
+++ b/src/crewai/utilities/events/init.py
@@ -9,6 +9,10 @@ from .crew_events import (
    CrewTestCompletedEvent,
    CrewTestFailedEvent,
 )
+from .guardrail_task_events import (
+    GuardrailTaskCompletedEvent,
+    GuardrailTaskStartedEvent,
+)
 from .agent_events import (
    AgentExecutionStartedEvent,
    AgentExecutionCompletedEvent,
--- a/src/crewai/utilities/events/event_types.py
+++ b/src/crewai/utilities/events/event_types.py
@@ -23,6 +23,10 @@ from .flow_events import (
    MethodExecutionFinishedEvent,
    MethodExecutionStartedEvent,
 )
+from .guardrail_task_events import (
+    GuardrailTaskCompletedEvent,
+    GuardrailTaskStartedEvent,
+)
 from .llm_events import (
    LLMCallCompletedEvent,
    LLMCallFailedEvent,
@@ -68,4 +72,6 @@ EventTypes = Union[
    LLMCallCompletedEvent,
    LLMCallFailedEvent,
    LLMStreamChunkEvent,
+    GuardrailTaskStartedEvent,
+    GuardrailTaskCompletedEvent,
 ]
--- a/src/crewai/utilities/events/guardrail_task_events.py
+++ b/src/crewai/utilities/events/guardrail_task_events.py
@@ -0,0 +1,28 @@
+from typing import Any, Callable, Optional, Union
+
+from pydantic import BaseModel
+
+from crewai.utilities.events.base_events import BaseEvent
+
+
+class GuardrailTaskStartedEvent(BaseEvent):
+    """Event emitted when a guardrail task starts
+
+    Attributes:
+        messages: Content can be either a string or a list of dictionaries that support
+            multimodal content (text, images, etc.)
+    """
+
+    type: str = "guardrail_task_started"
+    guardrail: Union[str, Callable]
+    retry_count: int
+
+
+class GuardrailTaskCompletedEvent(BaseEvent):
+    """Event emitted when a guardrail task completes"""
+
+    type: str = "guardrail_task_completed"
+    success: bool
+    result: Any
+    error: Optional[str] = None
+    retry_count: int
--- a/tests/cassettes/test_guardrail_emits_events.yaml
+++ b/tests/cassettes/test_guardrail_emits_events.yaml
@@ -0,0 +1,307 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
+      is the expected criteria for your final answer: Output\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nBegin! This is
+      VERY important to you, use the tools available and give your best Final Answer,
+      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '807'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
+        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFfNjiPHDb7PUxA6LiRhZ3Z2xpnbOHCQWSNY2J4gQbLGgKpid9NTzWoX
+        qyRrjQX2IXzJ6+2TBKzu1s9mDrlIrWYVi/zI7yvq9wuABfvFHSxch9n1Q1h9+z7/8MM/dv/8E3H5
+        fqdvdLh5//3mXx+/+/av/XeLpe2Im1/I5XnX2sV+CJQ5ymh2iTCTeb28vX57dXt1c3VTDX30FGxb
+        O+TVdVz1LLy6en11vXp9u7r8ZtrdRXakizv49wUAwO/10+IUT78t7uD1cn7Tkyq2tLg7LAJYpBjs
+        zQJVWTNKXiyPRhclk9TQH0DiDhwKtLwlQGgtbEDRHSWAD/IXFgxwX3/fwb0CwiNphvuWJC/hATrc
+        EmyIBDLqM3nYce5gSHHLnqUFhAkZgkQ6RFGC3GGGnigr5I6AfhvIZfLgEmdKjNDEBLHkoeQ1PHaU
+        qImJlmBPwOOmpoQAUyb1zFgyaOl7TPwRrQ5L8JSRA3lAhUS/Fk7k1x/kgzxI9ZEIQw+xgUyaWdol
+        DJgyuxIwhT1obPIOEx3NSltKGGqg7Owh9kMUkqz2SMCSIwwB9zVuaIp47EkyBsj7gfTkMGBxoXiC
+        Tclgp0jMELhnQyLHO4vzcg2vXv1dOFfQWdpXr+7gsWOdUWDhzBhg6FAJdiNA4nnLvpyHFxP00Zcw
+        xdDRMb05RfLAAqwxVPzWUE82i0JHYQASLWkqH6HrKlyzuyHFNmFvOTvbboikRC6HPUQBthh2soRd
+        x64DZK92msPsOkNjU1oFqrjzWB1PWwpxMPjMuSPVWrwrA+VBMrWpxnmKzX2TKUGZ465F45OlM/ZN
+        DCHudD2COaLXRFfU4BHYUuJmbwtHkDMlrEnBhvLOmn2LiWPRrxDWvWbqFVD8CNboArP52cMupmfI
+        saXcUbKmNM/ip6Z8Y3n9VD2cpvReHB3CsDyMKMdTT/hXS7icgjhps20MW1LYYmCPec7qQEsL1ti0
+        Pz3h0BxDir64PCE1doCOOfEpiXUgxw2Tn5nW1/DMuaWtlu6B6CyAlX2rXUzBsNpyimJbKhTXBsW9
+        czRktPRf6v79YAwM+1ENqkyNdTyhmJ3OCkVLXekwJYvQpGKzBxK/Kkqpls4FtoBH3rYRQz0kzqiN
+        JSiKGw6c9xNoY6djfTOzakQfW2TRDJuiLKR6hsoaTnKbY3VRGk69HvrlWAO27ej3VRc9DSHuKyss
+        iGMFLJUvn/+jIER+pMpjhAadRTxloMfzJkqRAjUNucxbCvvlobFzjEGrNIQqei35io15qCa7NCYJ
+        q1q1spKVHPvaP1PF4NHW1rrVTYGfCX6iQMKlr+nsaAM4DIEdjqoRE7yrwmPWd7jFczMaccdeT7VA
+        OJ0JiQbKbHkccrQrSWfJYbE7eUqYHZO4sY6JfHE0gjhEVZ5qHBvoSo8ClFJM6znJev/9DQXbWs2v
+        U9Ri8qbw7uHHe0vGlv+IHCqWBrhUxbbsckL3XOM0l1VASXUJvTk/vHcW83KKdIgpzxb7jiXbxaOH
+        6P4cJbMUK+GZRr5Qh3ckzyyTthslRyRngZgaxGBIRWQ+tTL5VPjPdJoHCiy0/Er+UPbgOpSWFHr0
+        NGu8i542RloDxx1CN1ZXNZtvaxet07Te6rmLKZa2O1a5G3/PrasZW9Ivn/+we+BM/2dxHNHEAwm/
+        fP7DxX7DMs8vlX7ZpLBlZ8yyZKvEYeCP9YKOQZcQcD+NI9Gu+gr1SNLAdolICx233erXMorETOk1
+        PORzMZ0QN62tk0CUsH/xLq3jQtA4Md9YPwnryBAbYrZzDXc4ioYWZ7VsSgA22a8zyUQo8aCYWat4
+        zzJ4Oi8maoqizaxSQjgxoEic3Nik+vNk+XSYTUNshxQ3+tXWRcPC2j0ZHaPYHKo5Dotq/XQB8HOd
+        gcvZWLsYUuyH/JTjM9XjLt++Gf0tjqP30Xr99nay5pgxHA03l9PofO7waZwV9WSMXjh0Hfnj1uPM
+        jcVzPDFcnKT9v+G85HtMnaX9f9wfDbVjyT8NiTy785SPyxL9Uu/Zl5cdYK4BL5TSlh09ZaZkpfDU
+        YAnjH4bFSJenhqWlNCQe/zU0w1Nz69/e4Dd45RYXny7+CwAA//8DAOzQwR9DDQAA
+    headers:
+      CF-RAY:
+      - 93402298d9980110-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 21 Apr 2025 21:57:12 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '6385'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999832'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2a19c29e1e9dd766289937937418044a
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
+      is the expected criteria for your final answer: Output\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nThis is the context
+      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
+      Previous result:\nAs a Test Agent, I have been tasked with providing a complete
+      response that meets the expected criteria for output. Therefore, here is the
+      full content without summarization, detailed as required.\n\nIn the realm of
+      testing, particularly software testing, several critical components come into
+      play. The fundamental types of testing include but are not limited to:\n\n1.
+      **Unit Testing**: This is the initial phase where individual components or modules
+      of the software are tested in isolation. Unit tests help ensure that each part
+      of the program functions correctly on its own, which aids in catching bugs early
+      in the development process.\n\n2. **Integration Testing**: After unit testing,
+      integration testing follows. This phase focuses on verifying the interaction
+      between various components or systems and ensuring that they work together as
+      intended.\n\n3. **System Testing**: Once the integrated components have been
+      tested, system testing involves validating the complete and fully integrated
+      software product. This ensures that it meets the specified requirements and
+      works as expected in a real-world environment.\n\n4. **Acceptance Testing**:
+      This is typically the final phase of testing and is usually carried out by end-users
+      or clients. The goal is to validate the usability and functionality of the system
+      against business requirements. Acceptance testing confirms that the software
+      is ready for deployment and meets the user\u2019s needs.\n\nTo facilitate these
+      testing processes effectively, various tools are leveraged. These tools can
+      include:\n\n- **Automated Testing Tools**: Tools like Selenium for web applications
+      or JUnit for Java applications allow testers to automate repetitive testing
+      tasks, which increases efficiency and reduces the possibility of human error.\n\n-
+      **Test Management Tools**: Tools such as JIRA or TestRail are essential for
+      tracking test progress, managing test cases, and reporting testing outcomes.\n\n-
+      **Continuous Integration Tools**: Tools like Jenkins help in automating the
+      process of running tests as part of the development pipeline, ensuring that
+      any changes made in the codebase are continuously tested.\n\nIn conclusion,
+      thorough testing through various stages\u2014unit, integration, system, and
+      acceptance\u2014combined with the strategic use of specialized tools, lays the
+      foundation for delivering high-quality software. It ensures that the product
+      not only functions correctly but also meets user expectations, paving the way
+      for successful implementations and satisfied clients.\n\n\nTry again, making
+      sure to address the validation error.\n\nBegin! This is VERY important to you,
+      use the tools available and give your best Final Answer, your job depends on
+      it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '3539'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
+        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA3RXTY/cuBG9768ojA9ZG90Ne3YSB3ObDJxgjMTe2G0ESHxhkyWpdiiWzI9uy/vn
+        gypKavXYexn0SGSx+F69V6XffwK4Ind1C1e2M9n2g9/+7X3+93+af7rxtw+PN2OPx5tCzc2bb+Px
+        v+/91UZ28OE3tHnetbPcDx4zcaivbUSTUaK+en3z5+vX13/55Vpf9OzQy7Z2yNsb3vYUaHv98vpm
+        +/L19tVfp90dk8V0dQv/+wkA4Hf9K3kGh1+vbuHlZn7SY0qmxavbZRHAVWQvT65MSpSyCflqc35p
+        OWQMmvoDBD6BNQFaOiIYaCVtMCGdMAJ8Dn+nYDzc6f+38BAgdwgRje+BG0jc5JOJCBlTptBuwIAA
+        EbHDkDTiMEQ2tgNKgClhyGQ8ZAYMqcjGTo4bBk/WCHgJesQMX4rxlEeQ3J2JLoEJDpriG/IeSsII
+        +HVAm+umHew7hIa95xOFFrhkTwGTZtuU4EyPIRuvyXHAkJOkL2+fXgE8NWhH63ED2A+dSfRNHh9N
+        JC4JPB7R193TBslMIq2vxz5ByeTpG8rLyKXtuGRdN0S2mNLuc/gcnj17BvtxQA34cU5lXyPLglc7
+        ePHiU6A8P3zx4hb2HSXI44DrNLKJLeYEFBwdyRXj5ea2gsoReswduwQnyh1VHi07PJiET/lAIazI
+        oQPGhmOfwKQJcHQ7eMhCp2JLMWUQrCUVhw2GhGBaQyFlOJQ2bQCDOXjJ0Al0PGBMcqA12XaAMXJM
+        cMCG9fQVIUPkNgqoutybjFHqocW0A0VEbp5AuRsHssb7EUzJ3IvwoCQ5s4mmxxPHxwSeHhHe6s6G
+        I7w1RyO4vFue7N692W/AzEX0pZB9hCNGaqbiBFeiQt3hfBmpqwtGr4Wwh5CxjXXPire7Ru5Q5txV
+        MJT/lMDGYidd6HkjdHwCR02DUQ7o2RWPwm3GaGxWEoEDggmcO4w7WB+5VKaAJ/idOpRVUv4HCujW
+        OhBwIHM7r4gRbfbjBshJOTejRNKDG2NROba56pFSKpqVIjInJxkcMJ8Qgz6TrISPQFmFSglEVqi2
+        Q+HI/ohw4NxBb4KUrYQ+87jUN9ou0Bc5sFJbVaas/sop9yYoi3e/Pix7OCr87/5xZnztNUrYL0LY
+        xzFl7L/TWM1zyjGdU1HtqN+jW0cUmRg4dexxB1PMeRMejS8mT6aEwW0zbzE4SHVdGtAulaaySXO1
+        mXypjAN2RvJZiRKMjZzSYlPJYpCflSjLwdHklA8ZMEj2Wh2LR0y4Bw7b1aNzmQbri5N0JkswweIG
+        SjIHEqfe6O6EtkTx7QUphraYaEJGPF9E6irifHFxBMUl9cy586MUFOVa7cGhAwxHihxEa0rZjVB2
+        Zy0OWdJY03bPwRUF5OwJh1Hhlq6hTpiyecSOvcOYNmDOceasHWaMvTaQWTmKf81XGlQlcbXVRsoY
+        yUy6kBbpRq06h4PnUXOvRdXQCtm5xBIMdGRpUbKnmsCPyZ+aYIKDyACTnPWlUMRe5Szna38MiC7t
+        4J77Xgpz6sMqV6ES4ZOs+h5F+PnT3f65xnmvzNRK+NHC93f750sbe7O0v/n9XgQqr/cM2DRkCUP2
+        o8jctNXsFxSqg6LwoTWMedQBQ0RhpjaqehcIsBdI0d1K8K3UwuIWF2er59ZXIk46G62AjKEzwc56
+        TgOKioKDiJ6mql71110NCanYToT3ET0GKr2GOuHhcoiZ7QsiDpgpyyik7WoDp448rs3rbhgkjrjh
+        AYU7pzF7PshCMwxnFSZqg3qE4kj9EPmoFygxmMhFJhHqsVaBsbZEY8fdjJJgA/9S8LVtLRjtz8m8
+        ffhwN9vmB0Ne8ZaRIg0Ykjl41OQ4tibUwWjxt6bhWB0eE8LgTa6jQ47GPuqypaVvlhqQp1YU4NgW
+        HdIEwOomjbHCgsBo2Xtz4Km/mZ71YCNi7A8q48UurTdqQQqBp1Zt4+kM9qTuFojuOWQKRRz0oofP
+        SP3B+5/vH55XSjdLgbzF8Eh19LqnaD3eP2zOdbFOghudxMB2JrQTeSuN0OqgKV+xtFhCmPFPdWQT
+        7msoKXXue8o6r9Vepr3ZInToh3Q5f4cRAp7UeXNkV6yOCE5YwQSBMxwimkfAr1RTPrcI9f6UI5re
+        U/jRdCQDRuP5tDjFPYsDJeKg5jA10/M3wx+P5BedSwaKzRqdzeTQm7n8n/j6pqKUMKpoDDi9jc0w
+        Yq7ji+UQaj8dShxYmz849CSOHFroqO22y8fJlKeYbBk8ujqWqZtkaWkt2YvpgJsfzTW1bFQJ/Vmc
+        iX2ZJgG5zf3DslCH/461ns/fV5XVeWabjMxOv0cYOE/uPA1t08S97k6ffCbJTYY/bGTMm21L9a0F
+        k37QkaQ+OIixa2dcjQ8XnelQMuBXK23p+y+4Oj6UYRAPEQ1JmTfFT9NBDcclW+5xniIdeWpX8zd4
+        M9bG3KoX6mQrblUd3V9+W7hi/w8AAP//jJhBbsQgDEX3OUXEASp12s70MiPkgkndMoDALHP3CsIE
+        0s6i6weOfyKI/+emZkFXR5BZ5cT+VnwGMCXThtnS2v6zba09jYY6oskJiql32doBgHO+CSxW/trI
+        upt365cQ/Uf6tVUYcpQ+ZURI3hWjntgHUek6zfO1hgT54PtFiP4WWLL/xvq480sLCUTPJgb6eqdc
+        xo4Onk+XOzlUlBoZyKYhaBCqTBS67+2pBGRNfgDToPtvP49qb9rJLf8p30E99ahliKhJHTX3ZRG/
+        6jF/vGx/z7VhUS8MhZIJY/kWGg1ku0UqYrtypCG3YAyRtlzFBGku+u0M73BSYlqnHwAAAP//AwDv
+        DD9WZRIAAA==
+    headers:
+      CF-RAY:
+      - 934022c27c860110-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 21 Apr 2025 21:57:22 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '9187'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999158'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_94bb40dead4c4e9c7fa12de3bfb636b7
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/cassettes/test_guardrail_using_llm.yaml
+++ b/tests/cassettes/test_guardrail_using_llm.yaml
@@ -0,0 +1,522 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
+      is the expected criteria for your final answer: Output\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nBegin! This is
+      VERY important to you, use the tools available and give your best Final Answer,
+      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '807'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
+        8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
+        aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
+        RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
+        AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
+        MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
+        mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
+        lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
+        2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
+        GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
+        EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
+        +Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
+        p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
+        WrN6Wv0LAAD//wMAAfXtOswFAAA=
+    headers:
+      CF-RAY:
+      - 934022059c2c0110-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 21 Apr 2025 21:56:45 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
+        path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '2377'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999832'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f39581c88a83855cf77c06098b787948
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
+      is the expected criteria for your final answer: Output\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nThis is the context
+      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
+      Previous result:\nThe task at hand requires a comprehensive and detailed response
+      that meets the expected criteria for output. In fulfilling this requirement,
+      it is essential to cover all relevant aspects and provide complete content,
+      ensuring that the information is clear, concise, and accurately addresses the
+      needs of the task. Therefore, I will outline the necessary components, adhere
+      to any guidelines provided, and ensure that the final output is thorough and
+      well-presented, without omitting important details or summarizing the information.
+      This approach will guarantee a high-quality response that satisfies the outlined
+      expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
+      This is VERY important to you, use the tools available and give your best Final
+      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
+      ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '1619'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
+        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
+        ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
+        PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
+        F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
+        NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
+        xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
+        8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
+        y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
+        moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
+        fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
+        gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
+        5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
+        Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
+        ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
+        cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
+        ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
+        mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
+        jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
+        ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
+        +FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
+        GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
+        LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
+    headers:
+      CF-RAY:
+      - 93402216690b0110-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 21 Apr 2025 21:56:49 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '4451'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999631'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_8383a16d5f5b7f53d659bebf481ba936
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
+      is the expected criteria for your final answer: Output\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nBegin! This is
+      VERY important to you, use the tools available and give your best Final Answer,
+      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '807'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
+        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
+        v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
+        n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
+        s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
+        t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
+        JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
+        6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
+        uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
+        SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
+        6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
+        ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
+        K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
+        Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
+        8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
+        +/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
+        t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
+    headers:
+      CF-RAY:
+      - 93402233baf00110-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 21 Apr 2025 21:56:56 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '6058'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999832'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f5273114a4a797fd0928674edb442194
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
+      is the expected criteria for your final answer: Output\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nThis is the context
+      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
+      Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
+      in a controlled environment. The criteria for evaluating success include clarity
+      of instructions, alignment with learning objectives, and the level of engagement
+      they elicit from participants. Specifically, a test task should include a clear
+      and concise prompt that outlines what is expected from the participants. Additionally,
+      it should have a well-defined scoring rubric that assesses the quality of responses
+      based on predetermined criteria. By ensuring that the test task is relevant
+      and challenging, it will effectively measure the participants\u2019 capabilities
+      and provide valuable insights into their understanding of the subject matter.
+      Effective preparation, testing methods, and feedback mechanisms are essential
+      to the success of any test task, while also maintaining an environment conducive
+      to learning and growth.\n\n\nTry again, making sure to address the validation
+      error.\n\nBegin! This is VERY important to you, use the tools available and
+      give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
+      "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '1887'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
+        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.12
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
+        fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
+        +9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
+        HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
+        e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
+        i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
+        CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
+        ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
+        QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
+        dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
+        xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
+        4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
+        9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
+        edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
+        qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
+        teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
+        YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
+        5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
+        xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
+        VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
+        5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
+        kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
+        Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
+        6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
+        pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
+        IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
+        LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
+        VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
+        C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
+        y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
+        sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
+        DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
+    headers:
+      CF-RAY:
+      - 9340225b9bca0110-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 21 Apr 2025 21:57:05 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '9141'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999564'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_0fc29337116c1d19a0543dfe5b0db291
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/test_task_guardrails.py
+++ b/tests/test_task_guardrails.py
@@ -1,11 +1,16 @@
-"""Tests for task guardrails functionality."""
-
-from unittest.mock import Mock
+from unittest.mock import Mock, patch

 import pytest

-from crewai.task import Task
+from crewai import Agent, Task
+from crewai.llm import LLM
+from crewai.tasks.guardrail_task import GuardrailTask
 from crewai.tasks.task_output import TaskOutput
+from crewai.utilities.events import (
+    GuardrailTaskCompletedEvent,
+    GuardrailTaskStartedEvent,
+)
+from crewai.utilities.events.crewai_event_bus import crewai_event_bus


 def test_task_without_guardrail():
@@ -22,7 +27,7 @@ def test_task_without_guardrail():
    assert result.raw == "test result"


-def test_task_with_successful_guardrail():
+def test_task_with_successful_guardrail_func():
    """Test that successful guardrail validation passes transformed result."""

    def guardrail(result: TaskOutput):
@@ -127,3 +132,190 @@ def test_guardrail_error_in_context():

    assert "Task failed guardrail validation" in str(exc_info.value)
    assert "Expected JSON, got string" in str(exc_info.value)
+
+
+@pytest.fixture
+def sample_agent():
+    return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_guardrail_using_llm(sample_agent):
+    task = Task(
+        description="Test task",
+        expected_output="Output",
+        guardrail="Ensure the output is equal to 'good result'",
+    )
+
+    with patch(
+        "crewai.tasks.guardrail_task.GuardrailTask.__call__",
+        side_effect=[(False, "bad result"), (True, "good result")],
+    ) as mock_guardrail:
+        task.execute_sync(agent=sample_agent)
+
+    assert mock_guardrail.call_count == 2
+
+    task.guardrail = GuardrailTask(
+        description="Ensure the output is equal to 'good result'",
+        llm=LLM(model="gpt-4o-mini"),
+    )
+
+    with patch(
+        "crewai.tasks.guardrail_task.GuardrailTask.__call__",
+        side_effect=[(False, "bad result"), (True, "good result")],
+    ) as mock_guardrail:
+        task.execute_sync(agent=sample_agent)
+
+    assert mock_guardrail.call_count == 2
+
+
+@pytest.fixture
+def task_output():
+    return TaskOutput(
+        raw="Test output",
+        description="Test task",
+        expected_output="Output",
+        agent="Test Agent",
+    )
+
+
+def test_guardrail_task_initialization_no_llm(task_output):
+    """Test GuardrailTask initialization fails without LLM"""
+    with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"):
+        GuardrailTask(description="Test")(task_output)
+
+
+@pytest.fixture
+def mock_llm():
+    llm = Mock(spec=LLM)
+    llm.call.return_value = """
+output = 'Sample book data'
+if isinstance(output, str):
+    result = (True, output)
+else:
+    result = (False, 'Invalid output format')
+print(result)
+"""
+    return llm
+
+
+@pytest.mark.parametrize(
+    "tool_run_output",
+    [
+        {
+            "output": "(True, 'Valid output')",
+            "expected_result": True,
+            "expected_output": "Valid output",
+        },
+        {
+            "output": "(False, 'Invalid output format')",
+            "expected_result": False,
+            "expected_output": "Invalid output format",
+        },
+        {
+            "output": "Something went wrong while running the code, Invalid output format",
+            "expected_result": False,
+            "expected_output": "Something went wrong while running the code, Invalid output format",
+        },
+        {
+            "output": "No result variable found",
+            "expected_result": False,
+            "expected_output": "No result variable found",
+        },
+        {
+            "output": (False, "Invalid output format"),
+            "expected_result": False,
+            "expected_output": "Invalid output format",
+        },
+    ],
+)
+@patch("crewai_tools.CodeInterpreterTool.run")
+def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output):
+    mock_run.return_value = tool_run_output["output"]
+
+    guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
+
+    result = guardrail(task_output)
+    assert result[0] == tool_run_output["expected_result"]
+    assert result[1] == tool_run_output["expected_output"]
+
+
+@patch("crewai_tools.CodeInterpreterTool.run")
+def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
+    mock_run.return_value = "(True, 'Valid output')"
+    additional_instructions = (
+        "This is an additional instruction created by the user follow it strictly"
+    )
+    guardrail = GuardrailTask(
+        description="Test validation",
+        llm=mock_llm,
+        additional_instructions=additional_instructions,
+    )
+
+    guardrail(task_output)
+
+    assert additional_instructions in str(mock_llm.call.call_args)
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_guardrail_emits_events(sample_agent):
+    started_guardrail = []
+    completed_guardrail = []
+
+    with crewai_event_bus.scoped_handlers():
+
+        @crewai_event_bus.on(GuardrailTaskStartedEvent)
+        def handle_guardrail_started(source, event):
+            started_guardrail.append(
+                {"guardrail": event.guardrail, "retry_count": event.retry_count}
+            )
+
+        @crewai_event_bus.on(GuardrailTaskCompletedEvent)
+        def handle_guardrail_completed(source, event):
+            completed_guardrail.append(
+                {
+                    "success": event.success,
+                    "result": event.result,
+                    "error": event.error,
+                    "retry_count": event.retry_count,
+                }
+            )
+
+        task = Task(
+            description="Test task",
+            expected_output="Output",
+            guardrail="Ensure the output is equal to 'good result'",
+        )
+
+        with patch(
+            "crewai.tasks.guardrail_task.GuardrailTask.__call__",
+            side_effect=[(False, "bad result"), (True, "good result")],
+        ):
+            task.execute_sync(agent=sample_agent)
+
+        expected_started_events = [
+            {
+                "guardrail": "Ensure the output is equal to 'good result'",
+                "retry_count": 0,
+            },
+            {
+                "guardrail": "Ensure the output is equal to 'good result'",
+                "retry_count": 1,
+            },
+        ]
+        expected_completed_events = [
+            {
+                "success": False,
+                "result": None,
+                "error": "bad result",
+                "retry_count": 0,
+            },
+            {
+                "success": True,
+                "result": "good result",
+                "error": None,
+                "retry_count": 1,
+            },
+        ]
+        assert started_guardrail == expected_started_events
+        assert completed_guardrail == expected_completed_events