mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 17:18:29 +00:00
feat: renaming GuardrailTask to TaskGuardrail
This commit is contained in:
@@ -322,9 +322,9 @@ blog_task = Task(
|
||||
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
|
||||
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
|
||||
|
||||
### GuardrailTask
|
||||
### TaskGuardrail
|
||||
|
||||
The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
|
||||
The `TaskGuardrail` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
|
||||
|
||||
#### Code Execution
|
||||
|
||||
@@ -800,7 +800,7 @@ from crewai.llm import LLM
|
||||
task = Task(
|
||||
description="Generate JSON data",
|
||||
expected_output="Valid JSON object",
|
||||
guardrail=GuardrailTask(
|
||||
guardrail=TaskGuardrail(
|
||||
description="Ensure the response is a valid JSON object",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
@@ -480,22 +480,22 @@ class Task(BaseModel):
|
||||
raise ValueError("Guardrail is not set")
|
||||
|
||||
from crewai.utilities.events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
TaskGuardrailCompletedEvent,
|
||||
TaskGuardrailStartedEvent,
|
||||
)
|
||||
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
GuardrailTaskStartedEvent(
|
||||
TaskGuardrailStartedEvent(
|
||||
guardrail=self.guardrail, retry_count=self.retry_count
|
||||
),
|
||||
)
|
||||
|
||||
if isinstance(self.guardrail, str):
|
||||
from crewai.tasks.guardrail_task import GuardrailTask
|
||||
from crewai.tasks.task_guardrail import TaskGuardrail
|
||||
|
||||
result = GuardrailTask(description=self.guardrail, task=self)(task_output)
|
||||
result = TaskGuardrail(description=self.guardrail, task=self)(task_output)
|
||||
else:
|
||||
result = self.guardrail(task_output)
|
||||
|
||||
@@ -503,7 +503,7 @@ class Task(BaseModel):
|
||||
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
GuardrailTaskCompletedEvent(
|
||||
TaskGuardrailCompletedEvent(
|
||||
success=guardrail_result.success,
|
||||
result=guardrail_result.result,
|
||||
error=guardrail_result.error,
|
||||
|
||||
@@ -1,174 +0,0 @@
|
||||
from typing import Any, Tuple
|
||||
|
||||
from crewai.llm import LLM
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.printer import Printer
|
||||
|
||||
|
||||
class GuardrailTask:
|
||||
"""A task that validates the output of another task using generated Python code.
|
||||
|
||||
This class generates and executes Python code to validate task outputs based on
|
||||
specified criteria. It uses an LLM to generate the validation code and provides
|
||||
safety guardrails for code execution. The code is executed in a Docker container
|
||||
if available, otherwise it is executed in the current environment.
|
||||
|
||||
Args:
|
||||
description (str): The description of the validation criteria.
|
||||
task (Task, optional): The task whose output needs validation.
|
||||
llm (LLM, optional): The language model to use for code generation.
|
||||
additional_instructions (str, optional): Additional instructions for the guardrail task.
|
||||
unsafe_mode (bool, optional): Whether to run the code in unsafe mode.
|
||||
Raises:
|
||||
ValueError: If no valid LLM is provided.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
description: str,
|
||||
task: Task | None = None,
|
||||
llm: LLM | None = None,
|
||||
additional_instructions: str = "",
|
||||
unsafe_mode: bool | None = None,
|
||||
):
|
||||
self.description = description
|
||||
|
||||
fallback_llm: LLM | None = (
|
||||
task.agent.llm
|
||||
if task is not None
|
||||
and hasattr(task, "agent")
|
||||
and task.agent is not None
|
||||
and hasattr(task.agent, "llm")
|
||||
else None
|
||||
)
|
||||
self.llm: LLM | None = llm or fallback_llm
|
||||
|
||||
self.additional_instructions = additional_instructions
|
||||
self.unsafe_mode = unsafe_mode
|
||||
|
||||
@property
|
||||
def system_instructions(self) -> str:
|
||||
"""System instructions for the LLM code generation.
|
||||
|
||||
Returns:
|
||||
str: Complete system instructions including security constraints.
|
||||
"""
|
||||
security_instructions = (
|
||||
"- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
|
||||
"- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
|
||||
"- Your code must not perform any file I/O, shell access, or dynamic code execution."
|
||||
)
|
||||
return (
|
||||
"You are a expert Python developer"
|
||||
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
|
||||
"Your code must:\n"
|
||||
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n"
|
||||
"- Use the literal string of the task output (already included in your input) if needed.\n"
|
||||
"- Generate the code **following strictly** the task description.\n"
|
||||
"- Be valid Python 3 — executable as-is.\n"
|
||||
f"{security_instructions}\n"
|
||||
"Additional instructions (do not override the previous instructions):\n"
|
||||
f"{self.additional_instructions}"
|
||||
)
|
||||
|
||||
def user_instructions(self, task_output: TaskOutput) -> str:
|
||||
"""Generates user instructions for the LLM code generation.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
str: Instructions for generating validation code.
|
||||
"""
|
||||
return (
|
||||
"Based on the task description below, generate Python 3 code that validates the task output. \n"
|
||||
"Task description:\n"
|
||||
f"{self.description}\n"
|
||||
"Here is the raw output from the task: \n"
|
||||
f"'{task_output.raw}' \n"
|
||||
"Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
|
||||
"Now generate Python code that follows the instructions above."
|
||||
)
|
||||
|
||||
def generate_code(self, task_output: TaskOutput) -> str:
|
||||
"""Generates Python code for validating the task output.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
str: Generated Python code for validation.
|
||||
"""
|
||||
if self.llm is None:
|
||||
raise ValueError("Provide a valid LLM to the GuardrailTask")
|
||||
|
||||
response = self.llm.call(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": self.system_instructions,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self.user_instructions(task_output=task_output),
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
printer = Printer()
|
||||
printer.print(
|
||||
content=f"The following code was generated for the guardrail task:\n{response}\n",
|
||||
color="cyan",
|
||||
)
|
||||
return response
|
||||
|
||||
def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Executes the validation code on the task output.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
Tuple[bool, Any]: A tuple containing:
|
||||
- bool: True if validation passed, False otherwise
|
||||
- Any: The validation result or error message
|
||||
"""
|
||||
import ast
|
||||
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
code = self.generate_code(task_output)
|
||||
|
||||
unsafe_mode = (
|
||||
self.unsafe_mode
|
||||
if self.unsafe_mode is not None
|
||||
else not self.check_docker_available()
|
||||
)
|
||||
|
||||
result = CodeInterpreterTool(code=code, unsafe_mode=unsafe_mode).run()
|
||||
|
||||
error_messages = [
|
||||
"Something went wrong while running the code",
|
||||
"No result variable found", # when running in unsafe mode, the final output should be stored in the result variable
|
||||
]
|
||||
|
||||
if any(msg in result for msg in error_messages):
|
||||
return False, result
|
||||
|
||||
if isinstance(result, str):
|
||||
try:
|
||||
result = ast.literal_eval(result)
|
||||
except Exception as e:
|
||||
return False, f"Error parsing result: {str(e)}"
|
||||
|
||||
return result
|
||||
|
||||
def check_docker_available(self) -> bool:
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(["docker", "--version"], check=True)
|
||||
return True
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
return False
|
||||
@@ -9,9 +9,9 @@ from .crew_events import (
|
||||
CrewTestCompletedEvent,
|
||||
CrewTestFailedEvent,
|
||||
)
|
||||
from .guardrail_task_events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
from .task_guardrail_events import (
|
||||
TaskGuardrailCompletedEvent,
|
||||
TaskGuardrailStartedEvent,
|
||||
)
|
||||
from .agent_events import (
|
||||
AgentExecutionStartedEvent,
|
||||
|
||||
@@ -23,10 +23,6 @@ from .flow_events import (
|
||||
MethodExecutionFinishedEvent,
|
||||
MethodExecutionStartedEvent,
|
||||
)
|
||||
from .guardrail_task_events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
)
|
||||
from .llm_events import (
|
||||
LLMCallCompletedEvent,
|
||||
LLMCallFailedEvent,
|
||||
@@ -38,6 +34,10 @@ from .task_events import (
|
||||
TaskFailedEvent,
|
||||
TaskStartedEvent,
|
||||
)
|
||||
from .task_guardrail_events import (
|
||||
TaskGuardrailCompletedEvent,
|
||||
TaskGuardrailStartedEvent,
|
||||
)
|
||||
from .tool_usage_events import (
|
||||
ToolUsageErrorEvent,
|
||||
ToolUsageFinishedEvent,
|
||||
@@ -72,6 +72,6 @@ EventTypes = Union[
|
||||
LLMCallCompletedEvent,
|
||||
LLMCallFailedEvent,
|
||||
LLMStreamChunkEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
GuardrailTaskCompletedEvent,
|
||||
TaskGuardrailStartedEvent,
|
||||
TaskGuardrailCompletedEvent,
|
||||
]
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai.utilities.events.base_events import BaseEvent
|
||||
|
||||
|
||||
class GuardrailTaskStartedEvent(BaseEvent):
|
||||
"""Event emitted when a guardrail task starts
|
||||
|
||||
Attributes:
|
||||
messages: Content can be either a string or a list of dictionaries that support
|
||||
multimodal content (text, images, etc.)
|
||||
"""
|
||||
|
||||
type: str = "guardrail_task_started"
|
||||
guardrail: Union[str, Callable]
|
||||
retry_count: int
|
||||
|
||||
|
||||
class GuardrailTaskCompletedEvent(BaseEvent):
|
||||
"""Event emitted when a guardrail task completes"""
|
||||
|
||||
type: str = "guardrail_task_completed"
|
||||
success: bool
|
||||
result: Any
|
||||
error: Optional[str] = None
|
||||
retry_count: int
|
||||
@@ -4,11 +4,11 @@ import pytest
|
||||
|
||||
from crewai import Agent, Task
|
||||
from crewai.llm import LLM
|
||||
from crewai.tasks.guardrail_task import GuardrailTask
|
||||
from crewai.tasks.task_guardrail import TaskGuardrail
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
TaskGuardrailCompletedEvent,
|
||||
TaskGuardrailStartedEvent,
|
||||
)
|
||||
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||
|
||||
@@ -148,20 +148,20 @@ def test_guardrail_using_llm(sample_agent):
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||
"crewai.tasks.task_guardrail.TaskGuardrail.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
) as mock_guardrail:
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
assert mock_guardrail.call_count == 2
|
||||
|
||||
task.guardrail = GuardrailTask(
|
||||
task.guardrail = TaskGuardrail(
|
||||
description="Ensure the output is equal to 'good result'",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||
"crewai.tasks.task_guardrail.TaskGuardrail.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
) as mock_guardrail:
|
||||
task.execute_sync(agent=sample_agent)
|
||||
@@ -179,10 +179,10 @@ def task_output():
|
||||
)
|
||||
|
||||
|
||||
def test_guardrail_task_initialization_no_llm(task_output):
|
||||
"""Test GuardrailTask initialization fails without LLM"""
|
||||
with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"):
|
||||
GuardrailTask(description="Test")(task_output)
|
||||
def test_task_guardrail_initialization_no_llm(task_output):
|
||||
"""Test TaskGuardrail initialization fails without LLM"""
|
||||
with pytest.raises(ValueError, match="Provide a valid LLM to the TaskGuardrail"):
|
||||
TaskGuardrail(description="Test")(task_output)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -235,10 +235,10 @@ print(result)
|
||||
],
|
||||
)
|
||||
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||
def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output):
|
||||
def test_task_guardrail_execute_code(mock_run, mock_llm, tool_run_output, task_output):
|
||||
mock_run.return_value = tool_run_output["output"]
|
||||
|
||||
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
|
||||
guardrail = TaskGuardrail(description="Test validation", llm=mock_llm)
|
||||
|
||||
result = guardrail(task_output)
|
||||
assert result[0] == tool_run_output["expected_result"]
|
||||
@@ -251,7 +251,7 @@ def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output
|
||||
additional_instructions = (
|
||||
"This is an additional instruction created by the user follow it strictly"
|
||||
)
|
||||
guardrail = GuardrailTask(
|
||||
guardrail = TaskGuardrail(
|
||||
description="Test validation",
|
||||
llm=mock_llm,
|
||||
additional_instructions=additional_instructions,
|
||||
@@ -269,13 +269,13 @@ def test_guardrail_emits_events(sample_agent):
|
||||
|
||||
with crewai_event_bus.scoped_handlers():
|
||||
|
||||
@crewai_event_bus.on(GuardrailTaskStartedEvent)
|
||||
@crewai_event_bus.on(TaskGuardrailStartedEvent)
|
||||
def handle_guardrail_started(source, event):
|
||||
started_guardrail.append(
|
||||
{"guardrail": event.guardrail, "retry_count": event.retry_count}
|
||||
)
|
||||
|
||||
@crewai_event_bus.on(GuardrailTaskCompletedEvent)
|
||||
@crewai_event_bus.on(TaskGuardrailCompletedEvent)
|
||||
def handle_guardrail_completed(source, event):
|
||||
completed_guardrail.append(
|
||||
{
|
||||
@@ -293,7 +293,7 @@ def test_guardrail_emits_events(sample_agent):
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||
"crewai.tasks.task_guardrail.TaskGuardrail.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
):
|
||||
task.execute_sync(agent=sample_agent)
|
||||
@@ -326,8 +326,8 @@ def test_guardrail_emits_events(sample_agent):
|
||||
assert completed_guardrail == expected_completed_events
|
||||
|
||||
|
||||
def test_guardrail_task_when_docker_is_not_available(mock_llm, task_output):
|
||||
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
|
||||
def test_task_guardrail_when_docker_is_not_available(mock_llm, task_output):
|
||||
guardrail = TaskGuardrail(description="Test validation", llm=mock_llm)
|
||||
with (
|
||||
patch(
|
||||
"crewai_tools.CodeInterpreterTool.__init__", return_value=None
|
||||
@@ -345,8 +345,8 @@ def test_guardrail_task_when_docker_is_not_available(mock_llm, task_output):
|
||||
mock_init.assert_called_once_with(code=ANY, unsafe_mode=True)
|
||||
|
||||
|
||||
def test_guardrail_task_when_docker_is_available(mock_llm, task_output):
|
||||
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
|
||||
def test_task_guardrail_when_docker_is_available(mock_llm, task_output):
|
||||
guardrail = TaskGuardrail(description="Test validation", llm=mock_llm)
|
||||
with (
|
||||
patch(
|
||||
"crewai_tools.CodeInterpreterTool.__init__", return_value=None
|
||||
@@ -364,8 +364,8 @@ def test_guardrail_task_when_docker_is_available(mock_llm, task_output):
|
||||
mock_init.assert_called_once_with(code=ANY, unsafe_mode=False)
|
||||
|
||||
|
||||
def test_guardrail_task_when_tool_output_is_not_valid(mock_llm, task_output):
|
||||
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
|
||||
def test_task_guardrail_when_tool_output_is_not_valid(mock_llm, task_output):
|
||||
guardrail = TaskGuardrail(description="Test validation", llm=mock_llm)
|
||||
with (
|
||||
patch(
|
||||
"crewai_tools.CodeInterpreterTool.__init__", return_value=None
|
||||
@@ -385,8 +385,8 @@ def test_guardrail_task_when_tool_output_is_not_valid(mock_llm, task_output):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unsafe_mode", [True, False])
|
||||
def test_guardrail_task_force_code_tool_unsafe_mode(mock_llm, task_output, unsafe_mode):
|
||||
guardrail = GuardrailTask(
|
||||
def test_task_guardrail_force_code_tool_unsafe_mode(mock_llm, task_output, unsafe_mode):
|
||||
guardrail = TaskGuardrail(
|
||||
description="Test validation", llm=mock_llm, unsafe_mode=unsafe_mode
|
||||
)
|
||||
with (
|
||||
|
||||
Reference in New Issue
Block a user