refactor: simplify TaskGuardrail to use LLM for validation, no code generation

2026-01-22 22:58:13 +00:00 · 2025-04-29 09:50:03 -03:00
parent e3ab80f517
commit e940ff3cbd
14 changed files with 3883 additions and 4217 deletions
--- a/src/crewai/tasks/task_guardrail.py
+++ b/src/crewai/tasks/task_guardrail.py
@@ -1,39 +1,40 @@
-from typing import Any, Tuple
+from typing import Any, Optional, Tuple

+from pydantic import BaseModel, Field
+
+from crewai.agent import Agent, LiteAgentOutput
 from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
-from crewai.utilities.printer import Printer
+
+
+class TaskGuardrailResult(BaseModel):
+    valid: bool = Field(
+        description="Whether the task output complies with the guardrail"
+    )
+    feedback: str | None = Field(
+        description="A feedback about the task output if it is not valid",
+        default=None,
+    )


 class TaskGuardrail:
    """A task that validates the output of another task using generated Python code.

-    This class generates and executes Python code to validate task outputs based on
-    specified criteria. It uses an LLM to generate the validation code and provides
-    safety guardrails for code execution.
-    The code is executed in a Docker container if available, otherwise it is executed in a sandboxed environment.
-    If unsafe mode is enabled, the code is executed in the current environment.
+    This class is used to validate the output from a Task based on specified criteria.
+    It uses an LLM to validate the output and provides a feedback if the output is not valid.

    Args:
        description (str): The description of the validation criteria.
        task (Task, optional): The task whose output needs validation.
        llm (LLM, optional): The language model to use for code generation.
-        additional_instructions (str, optional): Additional instructions for the guardrail task.
-        unsafe_mode (bool, optional): Whether to run the code in unsafe mode.
-    Raises:
-        ValueError: If no valid LLM is provided.
    """

-    generated_code: str = ""
-
    def __init__(
        self,
        description: str,
        task: Task | None = None,
        llm: LLM | None = None,
-        additional_instructions: str = "",
-        unsafe_mode: bool = False,
    ):
        self.description = description

@@ -47,84 +48,36 @@ class TaskGuardrail:
        )
        self.llm: LLM | None = llm or fallback_llm

-        self.additional_instructions = additional_instructions
-        self.unsafe_mode = unsafe_mode
+    def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
+        agent = Agent(
+            role="Guardrail Agent",
+            goal="Validate the output of the task",
+            backstory="You are a expert at validating the output of a task. By providing effective feedback if the output is not valid.",
+            llm=self.llm,
+        )

-    @property
-    def system_instructions(self) -> str:
-        """System instructions for the LLM code generation.
+        query = f"""
+        Ensure the following task result complies with the given guardrail.

-        Returns:
-            str: Complete system instructions including security constraints.
+        Task result:
+        {task_output.raw}
+
+        Guardrail:
+        {self.description}
+        
+        Your task:
+        - Confirm if the Task result complies with the guardrail.
+        - If not, provide clear feedback explaining what is wrong (e.g., by how much it violates the rule, or what specific part fails).
+        - Focus only on identifying issues — do not propose corrections.
+        - If the Task result complies with the guardrail, saying that is valid
        """
-        security_instructions = (
-            "- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
-            "- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
-            "- Your code must not perform any file I/O, shell access, or dynamic code execution."
-        )
-        return (
-            "You are a expert Python developer"
-            "You **must strictly** follow the task description, use the provided raw output as the input in your code. "
-            "Your code must:\n"
-            "- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n"
-            "- Use the literal string of the task output (already included in your input) if needed.\n"
-            "- Generate the code **following strictly** the task description.\n"
-            "- Be valid Python 3 — executable as-is.\n"
-            f"{security_instructions}\n"
-            "Additional instructions (do not override the previous instructions):\n"
-            f"{self.additional_instructions}"
-        )

-    def user_instructions(self, task_output: TaskOutput) -> str:
-        """Generates user instructions for the LLM code generation.
+        result = agent.kickoff(query, response_format=TaskGuardrailResult)

-        Args:
-            task_output (TaskOutput): The output to be validated.
-
-        Returns:
-            str: Instructions for generating validation code.
-        """
-        return (
-            "Based on the task description below, generate Python 3 code that validates the task output. \n"
-            "Task description:\n"
-            f"{self.description}\n"
-            "Here is the raw output from the task: \n"
-            f"'{task_output.raw}' \n"
-            "Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
-            "Now generate Python code that follows the instructions above."
-        )
-
-    def generate_code(self, task_output: TaskOutput) -> str:
-        """Generates Python code for validating the task output.
-
-        Args:
-            task_output (TaskOutput): The output to be validated.
-        """
-        if self.llm is None:
-            raise ValueError("Provide a valid LLM to the TaskGuardrail")
-
-        response = self.llm.call(
-            messages=[
-                {
-                    "role": "system",
-                    "content": self.system_instructions,
-                },
-                {
-                    "role": "user",
-                    "content": self.user_instructions(task_output=task_output),
-                },
-            ]
-        )
-
-        printer = Printer()
-        printer.print(
-            content=f"The following code was generated for the guardrail task:\n{response}\n",
-            color="cyan",
-        )
-        return response
+        return result

    def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
-        """Executes the validation code on the task output.
+        """Validates the output of a task based on specified criteria.

        Args:
            task_output (TaskOutput): The output to be validated.
@@ -134,28 +87,16 @@ class TaskGuardrail:
                - bool: True if validation passed, False otherwise
                - Any: The validation result or error message
        """
-        import ast

-        from crewai_tools import CodeInterpreterTool
+        try:
+            result = self._validate_output(task_output)
+            assert isinstance(
+                result.pydantic, TaskGuardrailResult
+            ), "The guardrail result is not a valid pydantic model"

-        self.generated_code = self.generate_code(task_output)
-
-        result = CodeInterpreterTool(
-            code=self.generated_code, unsafe_mode=self.unsafe_mode
-        ).run()
-
-        error_messages = [
-            "Something went wrong while running the code",
-            "No result variable found",  # when running in unsafe mode, the final output should be stored in the result variable
-        ]
-
-        if any(msg in result for msg in error_messages):
-            return False, result
-
-        if isinstance(result, str):
-            try:
-                result = ast.literal_eval(result)
-            except Exception as e:
-                return False, f"Error parsing result: {str(e)}"
-
-        return result
+            if result.pydantic.valid:
+                return True, task_output.raw
+            else:
+                return False, result.pydantic.feedback
+        except Exception as e:
+            return False, f"Error while validating the task output: {str(e)}"
--- a/src/crewai/utilities/events/task_guardrail_events.py
+++ b/src/crewai/utilities/events/task_guardrail_events.py
@@ -7,8 +7,8 @@ class TaskGuardrailStartedEvent(BaseEvent):
    """Event emitted when a guardrail task starts

    Attributes:
-        messages: Content can be either a string or a list of dictionaries that support
-            multimodal content (text, images, etc.)
+        guardrail: The guardrail callable or TaskGuardrail instance
+        retry_count: The number of times the guardrail has been retried
    """

    type: str = "task_guardrail_started"
@@ -23,8 +23,7 @@ class TaskGuardrailStartedEvent(BaseEvent):
        super().__init__(**data)

        if isinstance(self.guardrail, TaskGuardrail):
-            assert self.guardrail.generated_code is not None
-            self.guardrail = self.guardrail.generated_code.strip()
+            self.guardrail = self.guardrail.description.strip()
        elif isinstance(self.guardrail, Callable):
            self.guardrail = getsource(self.guardrail).strip()