feat: support to define a guardrail task no-code

2026-01-11 00:58:30 +00:00 · 2025-04-21 18:59:56 -03:00
parent 685d20f46c
commit 91b618b4e0
9 changed files with 1307 additions and 15 deletions
--- a/docs/concepts/tasks.mdx
+++ b/docs/concepts/tasks.mdx
@@ -322,6 +322,14 @@ blog_task = Task(
   - On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)` 
   - On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
 ### GuardrailTask
 The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
 #### Code Execution
 The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
 ### Error Handling Best Practices
 1. **Structured Error Responses**:
@@ -750,6 +758,8 @@ Task guardrails provide a powerful way to validate, transform, or filter task ou
 ### Basic Usage
 #### Define your own logic to validate
 ```python Code
 from typing import Tuple, Union
 from crewai import Task
@@ -769,6 +779,34 @@ task = Task(
 )
 ```
 #### Leverage a no-code approach for validation
 ```python Code
 from crewai import Task
 task = Task(
    description="Generate JSON data",
    expected_output="Valid JSON object",
    guardrail="Ensure the response is a valid JSON object"
 )
 ```
 #### Use custom models for code generation
 ```python Code
 from crewai import Task
 from crewai.llm import LLM
 task = Task(
    description="Generate JSON data",
    expected_output="Valid JSON object",
    guardrail=GuardrailTask(
        description="Ensure the response is a valid JSON object",
        llm=LLM(model="gpt-4o-mini"),
    )
 )
 ```
 ### How Guardrails Work
 1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -140,7 +140,7 @@ class Task(BaseModel):
        default=None,
    )
    processed_by_agents: Set[str] = Field(default_factory=set)
-    guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
+    guardrail: Optional[Union[Callable[[TaskOutput], Tuple[bool, Any]], str]] = Field(
        default=None,
        description="Function to validate task output before proceeding to next task",
    )
@@ -157,8 +157,12 @@ class Task(BaseModel):
    @field_validator("guardrail")
    @classmethod
-    def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
+    def validate_guardrail_function(
-        """Validate that the guardrail function has the correct signature and behavior.
+        cls, v: Optional[str | Callable]
    ) -> Optional[str | Callable]:
        """
        If v is a callable, validate that the guardrail function has the correct signature and behavior.
        If v is a string, return it as is.
        While type hints provide static checking, this validator ensures runtime safety by:
        1. Verifying the function accepts exactly one parameter (the TaskOutput)
@@ -171,16 +175,16 @@ class Task(BaseModel):
        - Clear error messages help users debug guardrail implementation issues
        Args:
-            v: The guardrail function to validate
+            v: The guardrail function to validate or a string describing the guardrail task
        Returns:
-            The validated guardrail function
+            The validated guardrail function or a string describing the guardrail task
        Raises:
            ValueError: If the function signature is invalid or return annotation
                       doesn't match Tuple[bool, Any]
        """
-        if v is not None:
+        if v is not None and callable(v):
            sig = inspect.signature(v)
            positional_args = [
                param
@@ -408,9 +412,7 @@ class Task(BaseModel):
            )
            if self.guardrail:
-                guardrail_result = GuardrailResult.from_tuple(
+                guardrail_result = self._process_guardrail(task_output)
                    self.guardrail(task_output)
                )
                if not guardrail_result.success:
                    if self.retry_count >= self.max_retries:
                        raise Exception(
@@ -464,13 +466,52 @@ class Task(BaseModel):
                    )
                )
                self._save_file(content)
-            crewai_event_bus.emit(self, TaskCompletedEvent(output=task_output, task=self))
+            crewai_event_bus.emit(
                self, TaskCompletedEvent(output=task_output, task=self)
            )
            return task_output
        except Exception as e:
            self.end_time = datetime.datetime.now()
            crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self))
            raise e  # Re-raise the exception after emitting the event
    def _process_guardrail(self, task_output: TaskOutput) -> GuardrailResult:
        if self.guardrail is None:
            raise ValueError("Guardrail is not set")
        from crewai.utilities.events import (
            GuardrailTaskCompletedEvent,
            GuardrailTaskStartedEvent,
        )
        from crewai.utilities.events.crewai_event_bus import crewai_event_bus
        crewai_event_bus.emit(
            self,
            GuardrailTaskStartedEvent(
                guardrail=self.guardrail, retry_count=self.retry_count
            ),
        )
        if isinstance(self.guardrail, str):
            from crewai.tasks.guardrail_task import GuardrailTask
            result = GuardrailTask(description=self.guardrail, task=self)(task_output)
        else:
            result = self.guardrail(task_output)
        guardrail_result = GuardrailResult.from_tuple(result)
        crewai_event_bus.emit(
            self,
            GuardrailTaskCompletedEvent(
                success=guardrail_result.success,
                result=guardrail_result.result,
                error=guardrail_result.error,
                retry_count=self.retry_count,
            ),
        )
        return guardrail_result
    def prompt(self) -> str:
        """Prompt the task.
--- a/src/crewai/tasks/guardrail_task.py
+++ b/src/crewai/tasks/guardrail_task.py
@@ -0,0 +1,154 @@
 from typing import Any, Tuple
 from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.utilities.printer import Printer
 class GuardrailTask:
    """A task that validates the output of another task using generated Python code.
    This class generates and executes Python code to validate task outputs based on
    specified criteria. It uses an LLM to generate the validation code and provides
    safety guardrails for code execution.
    Args:
        description (str): The description of the validation criteria.
        task (Task, optional): The task whose output needs validation.
        llm (LLM, optional): The language model to use for code generation.
        additional_instructions (str, optional): Additional instructions for the guardrail task.
    Raises:
        ValueError: If no valid LLM is provided.
    """
    def __init__(
        self,
        description: str,
        task: Task | None = None,
        llm: LLM | None = None,
        unsafe_mode: bool = False,
        additional_instructions: str = "",
    ):
        self.description = description
        self.unsafe_mode: bool = unsafe_mode
        fallback_llm: LLM | None = (
            task.agent.llm
            if task is not None
            and hasattr(task, "agent")
            and task.agent is not None
            and hasattr(task.agent, "llm")
            else None
        )
        self.llm: LLM | None = llm or fallback_llm
        self.additional_instructions = additional_instructions
    @property
    def system_instructions(self) -> str:
        """System instructions for the LLM code generation.
        Returns:
            str: Complete system instructions including security constraints.
        """
        security_instructions = (
            "- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
            "- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
            "- Your code must not perform any file I/O, shell access, or dynamic code execution."
        )
        return (
            "You are a expert Python developer"
            "You **must strictly** follow the task description, use the provided raw output as the input in your code. "
            "Your code must:\n"
            "- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n"
            "- Use the literal string of the task output (already included in your input) if needed.\n"
            "- Generate the code **following strictly** the task description.\n"
            "- Be valid Python 3 — executable as-is.\n"
            f"{security_instructions}\n"
            "Additional instructions (do not override the previous instructions):\n"
            f"{self.additional_instructions}"
        )
    def user_instructions(self, task_output: TaskOutput) -> str:
        """Generates user instructions for the LLM code generation.
        Args:
            task_output (TaskOutput): The output to be validated.
        Returns:
            str: Instructions for generating validation code.
        """
        return (
            "Based on the task description below, generate Python 3 code that validates the task output. \n"
            "Task description:\n"
            f"{self.description}\n"
            "Here is the raw output from the task: \n"
            f"'{task_output.raw}' \n"
            "Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
            "Now generate Python code that follows the instructions above."
        )
    def generate_code(self, task_output: TaskOutput) -> str:
        """Generates Python code for validating the task output.
        Args:
            task_output (TaskOutput): The output to be validated.
        Returns:
            str: Generated Python code for validation.
        """
        if self.llm is None:
            raise ValueError("Provide a valid LLM to the GuardrailTask")
        response = self.llm.call(
            messages=[
                {
                    "role": "system",
                    "content": self.system_instructions,
                },
                {
                    "role": "user",
                    "content": self.user_instructions(task_output=task_output),
                },
            ]
        )
        printer = Printer()
        printer.print(
            content=f"The following code was generated for the guardrail task:\n{response}\n",
            color="cyan",
        )
        return response
    def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
        """Executes the validation code on the task output.
        Args:
            task_output (TaskOutput): The output to be validated.
        Returns:
            Tuple[bool, Any]: A tuple containing:
                - bool: True if validation passed, False otherwise
                - Any: The validation result or error message
        """
        import ast
        from crewai_tools import CodeInterpreterTool
        code = self.generate_code(task_output)
        result = CodeInterpreterTool(code=code, unsafe_mode=self.unsafe_mode).run()
        error_messages = [
            "Something went wrong while running the code",
            "No result variable found",  # when running in unsafe mode, the final output should be stored in the result variable
        ]
        if any(msg in result for msg in error_messages):
            return False, result
        if isinstance(result, str):
            result = ast.literal_eval(result)
        return result
--- a/src/crewai/utilities/events/init.py
+++ b/src/crewai/utilities/events/init.py
@@ -9,6 +9,10 @@ from .crew_events import (
    CrewTestCompletedEvent,
    CrewTestFailedEvent,
 )
 from .guardrail_task_events import (
    GuardrailTaskCompletedEvent,
    GuardrailTaskStartedEvent,
 )
 from .agent_events import (
    AgentExecutionStartedEvent,
    AgentExecutionCompletedEvent,
--- a/src/crewai/utilities/events/event_types.py
+++ b/src/crewai/utilities/events/event_types.py
@@ -23,6 +23,10 @@ from .flow_events import (
    MethodExecutionFinishedEvent,
    MethodExecutionStartedEvent,
 )
 from .guardrail_task_events import (
    GuardrailTaskCompletedEvent,
    GuardrailTaskStartedEvent,
 )
 from .llm_events import (
    LLMCallCompletedEvent,
    LLMCallFailedEvent,
@@ -68,4 +72,6 @@ EventTypes = Union[
    LLMCallCompletedEvent,
    LLMCallFailedEvent,
    LLMStreamChunkEvent,
    GuardrailTaskStartedEvent,
    GuardrailTaskCompletedEvent,
 ]
--- a/src/crewai/utilities/events/guardrail_task_events.py
+++ b/src/crewai/utilities/events/guardrail_task_events.py
@@ -0,0 +1,28 @@
 from typing import Any, Callable, Optional, Union
 from pydantic import BaseModel
 from crewai.utilities.events.base_events import BaseEvent
 class GuardrailTaskStartedEvent(BaseEvent):
    """Event emitted when a guardrail task starts
    Attributes:
        messages: Content can be either a string or a list of dictionaries that support
            multimodal content (text, images, etc.)
    """
    type: str = "guardrail_task_started"
    guardrail: Union[str, Callable]
    retry_count: int
 class GuardrailTaskCompletedEvent(BaseEvent):
    """Event emitted when a guardrail task completes"""
    type: str = "guardrail_task_completed"
    success: bool
    result: Any
    error: Optional[str] = None
    retry_count: int
--- a/tests/cassettes/test_guardrail_emits_events.yaml
+++ b/tests/cassettes/test_guardrail_emits_events.yaml
@@ -0,0 +1,307 @@
 interactions:
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      respond using the exact following format:\n\nThought: I now can give a great
      answer\nFinal Answer: Your final answer must be the great and the most complete
      as possible, it must be outcome described.\n\nI MUST use these formats, my job
      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
      is the expected criteria for your final answer: Output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nBegin! This is
      VERY important to you, use the tools available and give your best Final Answer,
      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
      - '807'
      content-type:
      - application/json
      cookie:
      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.68.2
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.68.2
      x-stainless-raw-response:
      - 'true'
      x-stainless-read-timeout:
      - '600.0'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.12
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAAwAAAP//jFfNjiPHDb7PUxA6LiRhZ3Z2xpnbOHCQWSNY2J4gQbLGgKpid9NTzWoX
        qyRrjQX2IXzJ6+2TBKzu1s9mDrlIrWYVi/zI7yvq9wuABfvFHSxch9n1Q1h9+z7/8MM/dv/8E3H5
        fqdvdLh5//3mXx+/+/av/XeLpe2Im1/I5XnX2sV+CJQ5ymh2iTCTeb28vX57dXt1c3VTDX30FGxb
        O+TVdVz1LLy6en11vXp9u7r8ZtrdRXakizv49wUAwO/10+IUT78t7uD1cn7Tkyq2tLg7LAJYpBjs
        zQJVWTNKXiyPRhclk9TQH0DiDhwKtLwlQGgtbEDRHSWAD/IXFgxwX3/fwb0CwiNphvuWJC/hATrc
        EmyIBDLqM3nYce5gSHHLnqUFhAkZgkQ6RFGC3GGGnigr5I6AfhvIZfLgEmdKjNDEBLHkoeQ1PHaU
        qImJlmBPwOOmpoQAUyb1zFgyaOl7TPwRrQ5L8JSRA3lAhUS/Fk7k1x/kgzxI9ZEIQw+xgUyaWdol
        DJgyuxIwhT1obPIOEx3NSltKGGqg7Owh9kMUkqz2SMCSIwwB9zVuaIp47EkyBsj7gfTkMGBxoXiC
        Tclgp0jMELhnQyLHO4vzcg2vXv1dOFfQWdpXr+7gsWOdUWDhzBhg6FAJdiNA4nnLvpyHFxP00Zcw
        xdDRMb05RfLAAqwxVPzWUE82i0JHYQASLWkqH6HrKlyzuyHFNmFvOTvbboikRC6HPUQBthh2soRd
        x64DZK92msPsOkNjU1oFqrjzWB1PWwpxMPjMuSPVWrwrA+VBMrWpxnmKzX2TKUGZ465F45OlM/ZN
        DCHudD2COaLXRFfU4BHYUuJmbwtHkDMlrEnBhvLOmn2LiWPRrxDWvWbqFVD8CNboArP52cMupmfI
        saXcUbKmNM/ip6Z8Y3n9VD2cpvReHB3CsDyMKMdTT/hXS7icgjhps20MW1LYYmCPec7qQEsL1ti0
        Pz3h0BxDir64PCE1doCOOfEpiXUgxw2Tn5nW1/DMuaWtlu6B6CyAlX2rXUzBsNpyimJbKhTXBsW9
        czRktPRf6v79YAwM+1ENqkyNdTyhmJ3OCkVLXekwJYvQpGKzBxK/Kkqpls4FtoBH3rYRQz0kzqiN
        JSiKGw6c9xNoY6djfTOzakQfW2TRDJuiLKR6hsoaTnKbY3VRGk69HvrlWAO27ej3VRc9DSHuKyss
        iGMFLJUvn/+jIER+pMpjhAadRTxloMfzJkqRAjUNucxbCvvlobFzjEGrNIQqei35io15qCa7NCYJ
        q1q1spKVHPvaP1PF4NHW1rrVTYGfCX6iQMKlr+nsaAM4DIEdjqoRE7yrwmPWd7jFczMaccdeT7VA
        OJ0JiQbKbHkccrQrSWfJYbE7eUqYHZO4sY6JfHE0gjhEVZ5qHBvoSo8ClFJM6znJev/9DQXbWs2v
        U9Ri8qbw7uHHe0vGlv+IHCqWBrhUxbbsckL3XOM0l1VASXUJvTk/vHcW83KKdIgpzxb7jiXbxaOH
        6P4cJbMUK+GZRr5Qh3ckzyyTthslRyRngZgaxGBIRWQ+tTL5VPjPdJoHCiy0/Er+UPbgOpSWFHr0
        NGu8i542RloDxx1CN1ZXNZtvaxet07Te6rmLKZa2O1a5G3/PrasZW9Ivn/+we+BM/2dxHNHEAwm/
        fP7DxX7DMs8vlX7ZpLBlZ8yyZKvEYeCP9YKOQZcQcD+NI9Gu+gr1SNLAdolICx233erXMorETOk1
        PORzMZ0QN62tk0CUsH/xLq3jQtA4Md9YPwnryBAbYrZzDXc4ioYWZ7VsSgA22a8zyUQo8aCYWat4
        zzJ4Oi8maoqizaxSQjgxoEic3Nik+vNk+XSYTUNshxQ3+tXWRcPC2j0ZHaPYHKo5Dotq/XQB8HOd
        gcvZWLsYUuyH/JTjM9XjLt++Gf0tjqP30Xr99nay5pgxHA03l9PofO7waZwV9WSMXjh0Hfnj1uPM
        jcVzPDFcnKT9v+G85HtMnaX9f9wfDbVjyT8NiTy785SPyxL9Uu/Zl5cdYK4BL5TSlh09ZaZkpfDU
        YAnjH4bFSJenhqWlNCQe/zU0w1Nz69/e4Dd45RYXny7+CwAA//8DAOzQwR9DDQAA
    headers:
      CF-RAY:
      - 93402298d9980110-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Mon, 21 Apr 2025 21:57:12 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '6385'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999832'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_2a19c29e1e9dd766289937937418044a
    status:
      code: 200
      message: OK
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      respond using the exact following format:\n\nThought: I now can give a great
      answer\nFinal Answer: Your final answer must be the great and the most complete
      as possible, it must be outcome described.\n\nI MUST use these formats, my job
      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
      is the expected criteria for your final answer: Output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nThis is the context
      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
      Previous result:\nAs a Test Agent, I have been tasked with providing a complete
      response that meets the expected criteria for output. Therefore, here is the
      full content without summarization, detailed as required.\n\nIn the realm of
      testing, particularly software testing, several critical components come into
      play. The fundamental types of testing include but are not limited to:\n\n1.
      **Unit Testing**: This is the initial phase where individual components or modules
      of the software are tested in isolation. Unit tests help ensure that each part
      of the program functions correctly on its own, which aids in catching bugs early
      in the development process.\n\n2. **Integration Testing**: After unit testing,
      integration testing follows. This phase focuses on verifying the interaction
      between various components or systems and ensuring that they work together as
      intended.\n\n3. **System Testing**: Once the integrated components have been
      tested, system testing involves validating the complete and fully integrated
      software product. This ensures that it meets the specified requirements and
      works as expected in a real-world environment.\n\n4. **Acceptance Testing**:
      This is typically the final phase of testing and is usually carried out by end-users
      or clients. The goal is to validate the usability and functionality of the system
      against business requirements. Acceptance testing confirms that the software
      is ready for deployment and meets the user\u2019s needs.\n\nTo facilitate these
      testing processes effectively, various tools are leveraged. These tools can
      include:\n\n- **Automated Testing Tools**: Tools like Selenium for web applications
      or JUnit for Java applications allow testers to automate repetitive testing
      tasks, which increases efficiency and reduces the possibility of human error.\n\n-
      **Test Management Tools**: Tools such as JIRA or TestRail are essential for
      tracking test progress, managing test cases, and reporting testing outcomes.\n\n-
      **Continuous Integration Tools**: Tools like Jenkins help in automating the
      process of running tests as part of the development pipeline, ensuring that
      any changes made in the codebase are continuously tested.\n\nIn conclusion,
      thorough testing through various stages\u2014unit, integration, system, and
      acceptance\u2014combined with the strategic use of specialized tools, lays the
      foundation for delivering high-quality software. It ensures that the product
      not only functions correctly but also meets user expectations, paving the way
      for successful implementations and satisfied clients.\n\n\nTry again, making
      sure to address the validation error.\n\nBegin! This is VERY important to you,
      use the tools available and give your best Final Answer, your job depends on
      it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
      - '3539'
      content-type:
      - application/json
      cookie:
      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.68.2
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.68.2
      x-stainless-raw-response:
      - 'true'
      x-stainless-read-timeout:
      - '600.0'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.12
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAA3RXTY/cuBG9768ojA9ZG90Ne3YSB3ObDJxgjMTe2G0ESHxhkyWpdiiWzI9uy/vn
        gypKavXYexn0SGSx+F69V6XffwK4Ind1C1e2M9n2g9/+7X3+93+af7rxtw+PN2OPx5tCzc2bb+Px
        v+/91UZ28OE3tHnetbPcDx4zcaivbUSTUaK+en3z5+vX13/55Vpf9OzQy7Z2yNsb3vYUaHv98vpm
        +/L19tVfp90dk8V0dQv/+wkA4Hf9K3kGh1+vbuHlZn7SY0qmxavbZRHAVWQvT65MSpSyCflqc35p
        OWQMmvoDBD6BNQFaOiIYaCVtMCGdMAJ8Dn+nYDzc6f+38BAgdwgRje+BG0jc5JOJCBlTptBuwIAA
        EbHDkDTiMEQ2tgNKgClhyGQ8ZAYMqcjGTo4bBk/WCHgJesQMX4rxlEeQ3J2JLoEJDpriG/IeSsII
        +HVAm+umHew7hIa95xOFFrhkTwGTZtuU4EyPIRuvyXHAkJOkL2+fXgE8NWhH63ED2A+dSfRNHh9N
        JC4JPB7R193TBslMIq2vxz5ByeTpG8rLyKXtuGRdN0S2mNLuc/gcnj17BvtxQA34cU5lXyPLglc7
        ePHiU6A8P3zx4hb2HSXI44DrNLKJLeYEFBwdyRXj5ea2gsoReswduwQnyh1VHi07PJiET/lAIazI
        oQPGhmOfwKQJcHQ7eMhCp2JLMWUQrCUVhw2GhGBaQyFlOJQ2bQCDOXjJ0Al0PGBMcqA12XaAMXJM
        cMCG9fQVIUPkNgqoutybjFHqocW0A0VEbp5AuRsHssb7EUzJ3IvwoCQ5s4mmxxPHxwSeHhHe6s6G
        I7w1RyO4vFue7N692W/AzEX0pZB9hCNGaqbiBFeiQt3hfBmpqwtGr4Wwh5CxjXXPire7Ru5Q5txV
        MJT/lMDGYidd6HkjdHwCR02DUQ7o2RWPwm3GaGxWEoEDggmcO4w7WB+5VKaAJ/idOpRVUv4HCujW
        OhBwIHM7r4gRbfbjBshJOTejRNKDG2NROba56pFSKpqVIjInJxkcMJ8Qgz6TrISPQFmFSglEVqi2
        Q+HI/ohw4NxBb4KUrYQ+87jUN9ou0Bc5sFJbVaas/sop9yYoi3e/Pix7OCr87/5xZnztNUrYL0LY
        xzFl7L/TWM1zyjGdU1HtqN+jW0cUmRg4dexxB1PMeRMejS8mT6aEwW0zbzE4SHVdGtAulaaySXO1
        mXypjAN2RvJZiRKMjZzSYlPJYpCflSjLwdHklA8ZMEj2Wh2LR0y4Bw7b1aNzmQbri5N0JkswweIG
        SjIHEqfe6O6EtkTx7QUphraYaEJGPF9E6irifHFxBMUl9cy586MUFOVa7cGhAwxHihxEa0rZjVB2
        Zy0OWdJY03bPwRUF5OwJh1Hhlq6hTpiyecSOvcOYNmDOceasHWaMvTaQWTmKf81XGlQlcbXVRsoY
        yUy6kBbpRq06h4PnUXOvRdXQCtm5xBIMdGRpUbKnmsCPyZ+aYIKDyACTnPWlUMRe5Szna38MiC7t
        4J77Xgpz6sMqV6ES4ZOs+h5F+PnT3f65xnmvzNRK+NHC93f750sbe7O0v/n9XgQqr/cM2DRkCUP2
        o8jctNXsFxSqg6LwoTWMedQBQ0RhpjaqehcIsBdI0d1K8K3UwuIWF2er59ZXIk46G62AjKEzwc56
        TgOKioKDiJ6mql71110NCanYToT3ET0GKr2GOuHhcoiZ7QsiDpgpyyik7WoDp448rs3rbhgkjrjh
        AYU7pzF7PshCMwxnFSZqg3qE4kj9EPmoFygxmMhFJhHqsVaBsbZEY8fdjJJgA/9S8LVtLRjtz8m8
        ffhwN9vmB0Ne8ZaRIg0Ykjl41OQ4tibUwWjxt6bhWB0eE8LgTa6jQ47GPuqypaVvlhqQp1YU4NgW
        HdIEwOomjbHCgsBo2Xtz4Km/mZ71YCNi7A8q48UurTdqQQqBp1Zt4+kM9qTuFojuOWQKRRz0oofP
        SP3B+5/vH55XSjdLgbzF8Eh19LqnaD3eP2zOdbFOghudxMB2JrQTeSuN0OqgKV+xtFhCmPFPdWQT
        7msoKXXue8o6r9Vepr3ZInToh3Q5f4cRAp7UeXNkV6yOCE5YwQSBMxwimkfAr1RTPrcI9f6UI5re
        U/jRdCQDRuP5tDjFPYsDJeKg5jA10/M3wx+P5BedSwaKzRqdzeTQm7n8n/j6pqKUMKpoDDi9jc0w
        Yq7ji+UQaj8dShxYmz849CSOHFroqO22y8fJlKeYbBk8ujqWqZtkaWkt2YvpgJsfzTW1bFQJ/Vmc
        iX2ZJgG5zf3DslCH/461ns/fV5XVeWabjMxOv0cYOE/uPA1t08S97k6ffCbJTYY/bGTMm21L9a0F
        k37QkaQ+OIixa2dcjQ8XnelQMuBXK23p+y+4Oj6UYRAPEQ1JmTfFT9NBDcclW+5xniIdeWpX8zd4
        M9bG3KoX6mQrblUd3V9+W7hi/w8AAP//jJhBbsQgDEX3OUXEASp12s70MiPkgkndMoDALHP3CsIE
        0s6i6weOfyKI/+emZkFXR5BZ5cT+VnwGMCXThtnS2v6zba09jYY6oskJiql32doBgHO+CSxW/trI
        upt365cQ/Uf6tVUYcpQ+ZURI3hWjntgHUek6zfO1hgT54PtFiP4WWLL/xvq480sLCUTPJgb6eqdc
        xo4Onk+XOzlUlBoZyKYhaBCqTBS67+2pBGRNfgDToPtvP49qb9rJLf8p30E99ahliKhJHTX3ZRG/
        6jF/vGx/z7VhUS8MhZIJY/kWGg1ku0UqYrtypCG3YAyRtlzFBGku+u0M73BSYlqnHwAAAP//AwDv
        DD9WZRIAAA==
    headers:
      CF-RAY:
      - 934022c27c860110-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Mon, 21 Apr 2025 21:57:22 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '9187'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999158'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_94bb40dead4c4e9c7fa12de3bfb636b7
    status:
      code: 200
      message: OK
 version: 1
--- a/tests/cassettes/test_guardrail_using_llm.yaml
+++ b/tests/cassettes/test_guardrail_using_llm.yaml
@@ -0,0 +1,522 @@
 interactions:
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      respond using the exact following format:\n\nThought: I now can give a great
      answer\nFinal Answer: Your final answer must be the great and the most complete
      as possible, it must be outcome described.\n\nI MUST use these formats, my job
      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
      is the expected criteria for your final answer: Output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nBegin! This is
      VERY important to you, use the tools available and give your best Final Answer,
      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
      - '807'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.68.2
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.68.2
      x-stainless-raw-response:
      - 'true'
      x-stainless-read-timeout:
      - '600.0'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.12
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
        8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
        aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
        RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
        AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
        MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
        mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
        lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
        2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
        GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
        EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
        +Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
        p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
        WrN6Wv0LAAD//wMAAfXtOswFAAA=
    headers:
      CF-RAY:
      - 934022059c2c0110-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Mon, 21 Apr 2025 21:56:45 GMT
      Server:
      - cloudflare
      Set-Cookie:
      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
        path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
      - _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '2377'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999832'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_f39581c88a83855cf77c06098b787948
    status:
      code: 200
      message: OK
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      respond using the exact following format:\n\nThought: I now can give a great
      answer\nFinal Answer: Your final answer must be the great and the most complete
      as possible, it must be outcome described.\n\nI MUST use these formats, my job
      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
      is the expected criteria for your final answer: Output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nThis is the context
      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
      Previous result:\nThe task at hand requires a comprehensive and detailed response
      that meets the expected criteria for output. In fulfilling this requirement,
      it is essential to cover all relevant aspects and provide complete content,
      ensuring that the information is clear, concise, and accurately addresses the
      needs of the task. Therefore, I will outline the necessary components, adhere
      to any guidelines provided, and ensure that the final output is thorough and
      well-presented, without omitting important details or summarizing the information.
      This approach will guarantee a high-quality response that satisfies the outlined
      expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
      This is VERY important to you, use the tools available and give your best Final
      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
      ["\nObservation:"]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
      - '1619'
      content-type:
      - application/json
      cookie:
      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.68.2
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.68.2
      x-stainless-raw-response:
      - 'true'
      x-stainless-read-timeout:
      - '600.0'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.12
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
        ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
        PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
        F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
        NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
        xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
        8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
        y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
        moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
        fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
        gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
        5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
        Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
        ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
        cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
        ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
        mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
        jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
        ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
        +FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
        GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
        LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
    headers:
      CF-RAY:
      - 93402216690b0110-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Mon, 21 Apr 2025 21:56:49 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '4451'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999631'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_8383a16d5f5b7f53d659bebf481ba936
    status:
      code: 200
      message: OK
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      respond using the exact following format:\n\nThought: I now can give a great
      answer\nFinal Answer: Your final answer must be the great and the most complete
      as possible, it must be outcome described.\n\nI MUST use these formats, my job
      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
      is the expected criteria for your final answer: Output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nBegin! This is
      VERY important to you, use the tools available and give your best Final Answer,
      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
      - '807'
      content-type:
      - application/json
      cookie:
      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.68.2
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.68.2
      x-stainless-raw-response:
      - 'true'
      x-stainless-read-timeout:
      - '600.0'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.12
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
        v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
        n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
        s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
        t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
        JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
        6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
        uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
        SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
        6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
        ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
        K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
        Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
        8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
        +/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
        t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
    headers:
      CF-RAY:
      - 93402233baf00110-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Mon, 21 Apr 2025 21:56:56 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '6058'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999832'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_f5273114a4a797fd0928674edb442194
    status:
      code: 200
      message: OK
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      respond using the exact following format:\n\nThought: I now can give a great
      answer\nFinal Answer: Your final answer must be the great and the most complete
      as possible, it must be outcome described.\n\nI MUST use these formats, my job
      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
      is the expected criteria for your final answer: Output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nThis is the context
      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
      Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
      in a controlled environment. The criteria for evaluating success include clarity
      of instructions, alignment with learning objectives, and the level of engagement
      they elicit from participants. Specifically, a test task should include a clear
      and concise prompt that outlines what is expected from the participants. Additionally,
      it should have a well-defined scoring rubric that assesses the quality of responses
      based on predetermined criteria. By ensuring that the test task is relevant
      and challenging, it will effectively measure the participants\u2019 capabilities
      and provide valuable insights into their understanding of the subject matter.
      Effective preparation, testing methods, and feedback mechanisms are essential
      to the success of any test task, while also maintaining an environment conducive
      to learning and growth.\n\n\nTry again, making sure to address the validation
      error.\n\nBegin! This is VERY important to you, use the tools available and
      give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
      "gpt-4o-mini", "stop": ["\nObservation:"]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
      - '1887'
      content-type:
      - application/json
      cookie:
      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.68.2
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.68.2
      x-stainless-raw-response:
      - 'true'
      x-stainless-read-timeout:
      - '600.0'
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.12
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
        H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
        fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
        +9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
        HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
        e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
        i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
        CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
        ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
        QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
        dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
        xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
        4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
        9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
        edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
        qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
        teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
        YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
        5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
        xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
        VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
        5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
        kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
        Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
        6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
        pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
        IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
        LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
        VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
        C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
        y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
        sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
        DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
    headers:
      CF-RAY:
      - 9340225b9bca0110-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Mon, 21 Apr 2025 21:57:05 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '9141'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999564'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_0fc29337116c1d19a0543dfe5b0db291
    status:
      code: 200
      message: OK
 version: 1
--- a/tests/test_task_guardrails.py
+++ b/tests/test_task_guardrails.py
@@ -1,11 +1,16 @@
-"""Tests for task guardrails functionality."""
+from unittest.mock import Mock, patch
 from unittest.mock import Mock
 import pytest
-from crewai.task import Task
+from crewai import Agent, Task
 from crewai.llm import LLM
 from crewai.tasks.guardrail_task import GuardrailTask
 from crewai.tasks.task_output import TaskOutput
 from crewai.utilities.events import (
    GuardrailTaskCompletedEvent,
    GuardrailTaskStartedEvent,
 )
 from crewai.utilities.events.crewai_event_bus import crewai_event_bus
 def test_task_without_guardrail():
@@ -22,7 +27,7 @@ def test_task_without_guardrail():
    assert result.raw == "test result"
-def test_task_with_successful_guardrail():
+def test_task_with_successful_guardrail_func():
    """Test that successful guardrail validation passes transformed result."""
    def guardrail(result: TaskOutput):
@@ -127,3 +132,190 @@ def test_guardrail_error_in_context():
    assert "Task failed guardrail validation" in str(exc_info.value)
    assert "Expected JSON, got string" in str(exc_info.value)
@pytest.fixture
 def sample_agent():
    return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_guardrail_using_llm(sample_agent):
    task = Task(
        description="Test task",
        expected_output="Output",
        guardrail="Ensure the output is equal to 'good result'",
    )
    with patch(
        "crewai.tasks.guardrail_task.GuardrailTask.__call__",
        side_effect=[(False, "bad result"), (True, "good result")],
    ) as mock_guardrail:
        task.execute_sync(agent=sample_agent)
    assert mock_guardrail.call_count == 2
    task.guardrail = GuardrailTask(
        description="Ensure the output is equal to 'good result'",
        llm=LLM(model="gpt-4o-mini"),
    )
    with patch(
        "crewai.tasks.guardrail_task.GuardrailTask.__call__",
        side_effect=[(False, "bad result"), (True, "good result")],
    ) as mock_guardrail:
        task.execute_sync(agent=sample_agent)
    assert mock_guardrail.call_count == 2
@pytest.fixture
 def task_output():
    return TaskOutput(
        raw="Test output",
        description="Test task",
        expected_output="Output",
        agent="Test Agent",
    )
 def test_guardrail_task_initialization_no_llm(task_output):
    """Test GuardrailTask initialization fails without LLM"""
    with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"):
        GuardrailTask(description="Test")(task_output)
@pytest.fixture
 def mock_llm():
    llm = Mock(spec=LLM)
    llm.call.return_value = """
 output = 'Sample book data'
 if isinstance(output, str):
    result = (True, output)
 else:
    result = (False, 'Invalid output format')
 print(result)
 """
    return llm
@pytest.mark.parametrize(
    "tool_run_output",
    [
        {
            "output": "(True, 'Valid output')",
            "expected_result": True,
            "expected_output": "Valid output",
        },
        {
            "output": "(False, 'Invalid output format')",
            "expected_result": False,
            "expected_output": "Invalid output format",
        },
        {
            "output": "Something went wrong while running the code, Invalid output format",
            "expected_result": False,
            "expected_output": "Something went wrong while running the code, Invalid output format",
        },
        {
            "output": "No result variable found",
            "expected_result": False,
            "expected_output": "No result variable found",
        },
        {
            "output": (False, "Invalid output format"),
            "expected_result": False,
            "expected_output": "Invalid output format",
        },
    ],
 )
@patch("crewai_tools.CodeInterpreterTool.run")
 def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output):
    mock_run.return_value = tool_run_output["output"]
    guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
    result = guardrail(task_output)
    assert result[0] == tool_run_output["expected_result"]
    assert result[1] == tool_run_output["expected_output"]
@patch("crewai_tools.CodeInterpreterTool.run")
 def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
    mock_run.return_value = "(True, 'Valid output')"
    additional_instructions = (
        "This is an additional instruction created by the user follow it strictly"
    )
    guardrail = GuardrailTask(
        description="Test validation",
        llm=mock_llm,
        additional_instructions=additional_instructions,
    )
    guardrail(task_output)
    assert additional_instructions in str(mock_llm.call.call_args)
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_guardrail_emits_events(sample_agent):
    started_guardrail = []
    completed_guardrail = []
    with crewai_event_bus.scoped_handlers():
        @crewai_event_bus.on(GuardrailTaskStartedEvent)
        def handle_guardrail_started(source, event):
            started_guardrail.append(
                {"guardrail": event.guardrail, "retry_count": event.retry_count}
            )
        @crewai_event_bus.on(GuardrailTaskCompletedEvent)
        def handle_guardrail_completed(source, event):
            completed_guardrail.append(
                {
                    "success": event.success,
                    "result": event.result,
                    "error": event.error,
                    "retry_count": event.retry_count,
                }
            )
        task = Task(
            description="Test task",
            expected_output="Output",
            guardrail="Ensure the output is equal to 'good result'",
        )
        with patch(
            "crewai.tasks.guardrail_task.GuardrailTask.__call__",
            side_effect=[(False, "bad result"), (True, "good result")],
        ):
            task.execute_sync(agent=sample_agent)
        expected_started_events = [
            {
                "guardrail": "Ensure the output is equal to 'good result'",
                "retry_count": 0,
            },
            {
                "guardrail": "Ensure the output is equal to 'good result'",
                "retry_count": 1,
            },
        ]
        expected_completed_events = [
            {
                "success": False,
                "result": None,
                "error": "bad result",
                "retry_count": 0,
            },
            {
                "success": True,
                "result": "good result",
                "error": None,
                "retry_count": 1,
            },
        ]
        assert started_guardrail == expected_started_events
        assert completed_guardrail == expected_completed_events