refactor: simplify TaskGuardrail to use LLM for validation, no code generation

This commit is contained in:
Lucas Gomide
2025-04-29 09:50:03 -03:00
parent e3ab80f517
commit e940ff3cbd
14 changed files with 3883 additions and 4217 deletions

View File

@@ -1,39 +1,40 @@
from typing import Any, Tuple
from typing import Any, Optional, Tuple
from pydantic import BaseModel, Field
from crewai.agent import Agent, LiteAgentOutput
from crewai.llm import LLM
from crewai.task import Task
from crewai.tasks.task_output import TaskOutput
from crewai.utilities.printer import Printer
class TaskGuardrailResult(BaseModel):
valid: bool = Field(
description="Whether the task output complies with the guardrail"
)
feedback: str | None = Field(
description="A feedback about the task output if it is not valid",
default=None,
)
class TaskGuardrail:
"""A task that validates the output of another task using generated Python code.
This class generates and executes Python code to validate task outputs based on
specified criteria. It uses an LLM to generate the validation code and provides
safety guardrails for code execution.
The code is executed in a Docker container if available, otherwise it is executed in a sandboxed environment.
If unsafe mode is enabled, the code is executed in the current environment.
This class is used to validate the output from a Task based on specified criteria.
It uses an LLM to validate the output and provides a feedback if the output is not valid.
Args:
description (str): The description of the validation criteria.
task (Task, optional): The task whose output needs validation.
llm (LLM, optional): The language model to use for code generation.
additional_instructions (str, optional): Additional instructions for the guardrail task.
unsafe_mode (bool, optional): Whether to run the code in unsafe mode.
Raises:
ValueError: If no valid LLM is provided.
"""
generated_code: str = ""
def __init__(
self,
description: str,
task: Task | None = None,
llm: LLM | None = None,
additional_instructions: str = "",
unsafe_mode: bool = False,
):
self.description = description
@@ -47,84 +48,36 @@ class TaskGuardrail:
)
self.llm: LLM | None = llm or fallback_llm
self.additional_instructions = additional_instructions
self.unsafe_mode = unsafe_mode
def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
agent = Agent(
role="Guardrail Agent",
goal="Validate the output of the task",
backstory="You are a expert at validating the output of a task. By providing effective feedback if the output is not valid.",
llm=self.llm,
)
@property
def system_instructions(self) -> str:
"""System instructions for the LLM code generation.
query = f"""
Ensure the following task result complies with the given guardrail.
Returns:
str: Complete system instructions including security constraints.
Task result:
{task_output.raw}
Guardrail:
{self.description}
Your task:
- Confirm if the Task result complies with the guardrail.
- If not, provide clear feedback explaining what is wrong (e.g., by how much it violates the rule, or what specific part fails).
- Focus only on identifying issues — do not propose corrections.
- If the Task result complies with the guardrail, saying that is valid
"""
security_instructions = (
"- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
"- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
"- Your code must not perform any file I/O, shell access, or dynamic code execution."
)
return (
"You are a expert Python developer"
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
"Your code must:\n"
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n"
"- Use the literal string of the task output (already included in your input) if needed.\n"
"- Generate the code **following strictly** the task description.\n"
"- Be valid Python 3 — executable as-is.\n"
f"{security_instructions}\n"
"Additional instructions (do not override the previous instructions):\n"
f"{self.additional_instructions}"
)
def user_instructions(self, task_output: TaskOutput) -> str:
"""Generates user instructions for the LLM code generation.
result = agent.kickoff(query, response_format=TaskGuardrailResult)
Args:
task_output (TaskOutput): The output to be validated.
Returns:
str: Instructions for generating validation code.
"""
return (
"Based on the task description below, generate Python 3 code that validates the task output. \n"
"Task description:\n"
f"{self.description}\n"
"Here is the raw output from the task: \n"
f"'{task_output.raw}' \n"
"Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
"Now generate Python code that follows the instructions above."
)
def generate_code(self, task_output: TaskOutput) -> str:
"""Generates Python code for validating the task output.
Args:
task_output (TaskOutput): The output to be validated.
"""
if self.llm is None:
raise ValueError("Provide a valid LLM to the TaskGuardrail")
response = self.llm.call(
messages=[
{
"role": "system",
"content": self.system_instructions,
},
{
"role": "user",
"content": self.user_instructions(task_output=task_output),
},
]
)
printer = Printer()
printer.print(
content=f"The following code was generated for the guardrail task:\n{response}\n",
color="cyan",
)
return response
return result
def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
"""Executes the validation code on the task output.
"""Validates the output of a task based on specified criteria.
Args:
task_output (TaskOutput): The output to be validated.
@@ -134,28 +87,16 @@ class TaskGuardrail:
- bool: True if validation passed, False otherwise
- Any: The validation result or error message
"""
import ast
from crewai_tools import CodeInterpreterTool
try:
result = self._validate_output(task_output)
assert isinstance(
result.pydantic, TaskGuardrailResult
), "The guardrail result is not a valid pydantic model"
self.generated_code = self.generate_code(task_output)
result = CodeInterpreterTool(
code=self.generated_code, unsafe_mode=self.unsafe_mode
).run()
error_messages = [
"Something went wrong while running the code",
"No result variable found", # when running in unsafe mode, the final output should be stored in the result variable
]
if any(msg in result for msg in error_messages):
return False, result
if isinstance(result, str):
try:
result = ast.literal_eval(result)
except Exception as e:
return False, f"Error parsing result: {str(e)}"
return result
if result.pydantic.valid:
return True, task_output.raw
else:
return False, result.pydantic.feedback
except Exception as e:
return False, f"Error while validating the task output: {str(e)}"

View File

@@ -7,8 +7,8 @@ class TaskGuardrailStartedEvent(BaseEvent):
"""Event emitted when a guardrail task starts
Attributes:
messages: Content can be either a string or a list of dictionaries that support
multimodal content (text, images, etc.)
guardrail: The guardrail callable or TaskGuardrail instance
retry_count: The number of times the guardrail has been retried
"""
type: str = "task_guardrail_started"
@@ -23,8 +23,7 @@ class TaskGuardrailStartedEvent(BaseEvent):
super().__init__(**data)
if isinstance(self.guardrail, TaskGuardrail):
assert self.guardrail.generated_code is not None
self.guardrail = self.guardrail.generated_code.strip()
self.guardrail = self.guardrail.description.strip()
elif isinstance(self.guardrail, Callable):
self.guardrail = getsource(self.guardrail).strip()