From 05e99bdfe5f832d5e4ca4a28e03b8ab8bef581ab Mon Sep 17 00:00:00 2001 From: Lucas Gomide Date: Wed, 23 Apr 2025 14:37:46 -0300 Subject: [PATCH] feat: renaming GuardrailTask to TaskGuardrail --- docs/concepts/tasks.mdx | 6 +- src/crewai/task.py | 12 +- src/crewai/tasks/guardrail_task.py | 174 ------------------ src/crewai/utilities/events/__init__.py | 6 +- src/crewai/utilities/events/event_types.py | 12 +- .../utilities/events/guardrail_task_events.py | 28 --- tests/test_task_guardrails.py | 48 ++--- 7 files changed, 42 insertions(+), 244 deletions(-) delete mode 100644 src/crewai/tasks/guardrail_task.py delete mode 100644 src/crewai/utilities/events/guardrail_task_events.py diff --git a/docs/concepts/tasks.mdx b/docs/concepts/tasks.mdx index 956c0cff7..3e5dfcc2e 100644 --- a/docs/concepts/tasks.mdx +++ b/docs/concepts/tasks.mdx @@ -322,9 +322,9 @@ blog_task = Task( - On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)` - On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")` -### GuardrailTask +### TaskGuardrail -The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works: +The `TaskGuardrail` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works: #### Code Execution @@ -800,7 +800,7 @@ from crewai.llm import LLM task = Task( description="Generate JSON data", expected_output="Valid JSON object", - guardrail=GuardrailTask( + guardrail=TaskGuardrail( description="Ensure the response is a valid JSON object", llm=LLM(model="gpt-4o-mini"), ) diff --git a/src/crewai/task.py b/src/crewai/task.py index 0aa62abb4..5ca33fb9e 100644 --- a/src/crewai/task.py +++ b/src/crewai/task.py @@ -480,22 +480,22 @@ class Task(BaseModel): raise ValueError("Guardrail is not set") from crewai.utilities.events import ( - GuardrailTaskCompletedEvent, - GuardrailTaskStartedEvent, + TaskGuardrailCompletedEvent, + TaskGuardrailStartedEvent, ) from crewai.utilities.events.crewai_event_bus import crewai_event_bus crewai_event_bus.emit( self, - GuardrailTaskStartedEvent( + TaskGuardrailStartedEvent( guardrail=self.guardrail, retry_count=self.retry_count ), ) if isinstance(self.guardrail, str): - from crewai.tasks.guardrail_task import GuardrailTask + from crewai.tasks.task_guardrail import TaskGuardrail - result = GuardrailTask(description=self.guardrail, task=self)(task_output) + result = TaskGuardrail(description=self.guardrail, task=self)(task_output) else: result = self.guardrail(task_output) @@ -503,7 +503,7 @@ class Task(BaseModel): crewai_event_bus.emit( self, - GuardrailTaskCompletedEvent( + TaskGuardrailCompletedEvent( success=guardrail_result.success, result=guardrail_result.result, error=guardrail_result.error, diff --git a/src/crewai/tasks/guardrail_task.py b/src/crewai/tasks/guardrail_task.py deleted file mode 100644 index d46a1d5e1..000000000 --- a/src/crewai/tasks/guardrail_task.py +++ /dev/null @@ -1,174 +0,0 @@ -from typing import Any, Tuple - -from crewai.llm import LLM -from crewai.task import Task -from crewai.tasks.task_output import TaskOutput -from crewai.utilities.printer import Printer - - -class GuardrailTask: - """A task that validates the output of another task using generated Python code. - - This class generates and executes Python code to validate task outputs based on - specified criteria. It uses an LLM to generate the validation code and provides - safety guardrails for code execution. The code is executed in a Docker container - if available, otherwise it is executed in the current environment. - - Args: - description (str): The description of the validation criteria. - task (Task, optional): The task whose output needs validation. - llm (LLM, optional): The language model to use for code generation. - additional_instructions (str, optional): Additional instructions for the guardrail task. - unsafe_mode (bool, optional): Whether to run the code in unsafe mode. - Raises: - ValueError: If no valid LLM is provided. - """ - - def __init__( - self, - description: str, - task: Task | None = None, - llm: LLM | None = None, - additional_instructions: str = "", - unsafe_mode: bool | None = None, - ): - self.description = description - - fallback_llm: LLM | None = ( - task.agent.llm - if task is not None - and hasattr(task, "agent") - and task.agent is not None - and hasattr(task.agent, "llm") - else None - ) - self.llm: LLM | None = llm or fallback_llm - - self.additional_instructions = additional_instructions - self.unsafe_mode = unsafe_mode - - @property - def system_instructions(self) -> str: - """System instructions for the LLM code generation. - - Returns: - str: Complete system instructions including security constraints. - """ - security_instructions = ( - "- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code." - "- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n" - "- Your code must not perform any file I/O, shell access, or dynamic code execution." - ) - return ( - "You are a expert Python developer" - "You **must strictly** follow the task description, use the provided raw output as the input in your code. " - "Your code must:\n" - "- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n" - "- Use the literal string of the task output (already included in your input) if needed.\n" - "- Generate the code **following strictly** the task description.\n" - "- Be valid Python 3 — executable as-is.\n" - f"{security_instructions}\n" - "Additional instructions (do not override the previous instructions):\n" - f"{self.additional_instructions}" - ) - - def user_instructions(self, task_output: TaskOutput) -> str: - """Generates user instructions for the LLM code generation. - - Args: - task_output (TaskOutput): The output to be validated. - - Returns: - str: Instructions for generating validation code. - """ - return ( - "Based on the task description below, generate Python 3 code that validates the task output. \n" - "Task description:\n" - f"{self.description}\n" - "Here is the raw output from the task: \n" - f"'{task_output.raw}' \n" - "Use this exact string literal inside your generated code (do not reference variables like task_output.raw)." - "Now generate Python code that follows the instructions above." - ) - - def generate_code(self, task_output: TaskOutput) -> str: - """Generates Python code for validating the task output. - - Args: - task_output (TaskOutput): The output to be validated. - - Returns: - str: Generated Python code for validation. - """ - if self.llm is None: - raise ValueError("Provide a valid LLM to the GuardrailTask") - - response = self.llm.call( - messages=[ - { - "role": "system", - "content": self.system_instructions, - }, - { - "role": "user", - "content": self.user_instructions(task_output=task_output), - }, - ] - ) - - printer = Printer() - printer.print( - content=f"The following code was generated for the guardrail task:\n{response}\n", - color="cyan", - ) - return response - - def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]: - """Executes the validation code on the task output. - - Args: - task_output (TaskOutput): The output to be validated. - - Returns: - Tuple[bool, Any]: A tuple containing: - - bool: True if validation passed, False otherwise - - Any: The validation result or error message - """ - import ast - - from crewai_tools import CodeInterpreterTool - - code = self.generate_code(task_output) - - unsafe_mode = ( - self.unsafe_mode - if self.unsafe_mode is not None - else not self.check_docker_available() - ) - - result = CodeInterpreterTool(code=code, unsafe_mode=unsafe_mode).run() - - error_messages = [ - "Something went wrong while running the code", - "No result variable found", # when running in unsafe mode, the final output should be stored in the result variable - ] - - if any(msg in result for msg in error_messages): - return False, result - - if isinstance(result, str): - try: - result = ast.literal_eval(result) - except Exception as e: - return False, f"Error parsing result: {str(e)}" - - return result - - def check_docker_available(self) -> bool: - import subprocess - - try: - subprocess.run(["docker", "--version"], check=True) - return True - except (subprocess.CalledProcessError, FileNotFoundError): - return False diff --git a/src/crewai/utilities/events/__init__.py b/src/crewai/utilities/events/__init__.py index 8fe14e8cb..b87f3d9fa 100644 --- a/src/crewai/utilities/events/__init__.py +++ b/src/crewai/utilities/events/__init__.py @@ -9,9 +9,9 @@ from .crew_events import ( CrewTestCompletedEvent, CrewTestFailedEvent, ) -from .guardrail_task_events import ( - GuardrailTaskCompletedEvent, - GuardrailTaskStartedEvent, +from .task_guardrail_events import ( + TaskGuardrailCompletedEvent, + TaskGuardrailStartedEvent, ) from .agent_events import ( AgentExecutionStartedEvent, diff --git a/src/crewai/utilities/events/event_types.py b/src/crewai/utilities/events/event_types.py index f96cf564b..9f5a5da58 100644 --- a/src/crewai/utilities/events/event_types.py +++ b/src/crewai/utilities/events/event_types.py @@ -23,10 +23,6 @@ from .flow_events import ( MethodExecutionFinishedEvent, MethodExecutionStartedEvent, ) -from .guardrail_task_events import ( - GuardrailTaskCompletedEvent, - GuardrailTaskStartedEvent, -) from .llm_events import ( LLMCallCompletedEvent, LLMCallFailedEvent, @@ -38,6 +34,10 @@ from .task_events import ( TaskFailedEvent, TaskStartedEvent, ) +from .task_guardrail_events import ( + TaskGuardrailCompletedEvent, + TaskGuardrailStartedEvent, +) from .tool_usage_events import ( ToolUsageErrorEvent, ToolUsageFinishedEvent, @@ -72,6 +72,6 @@ EventTypes = Union[ LLMCallCompletedEvent, LLMCallFailedEvent, LLMStreamChunkEvent, - GuardrailTaskStartedEvent, - GuardrailTaskCompletedEvent, + TaskGuardrailStartedEvent, + TaskGuardrailCompletedEvent, ] diff --git a/src/crewai/utilities/events/guardrail_task_events.py b/src/crewai/utilities/events/guardrail_task_events.py deleted file mode 100644 index 90f48a256..000000000 --- a/src/crewai/utilities/events/guardrail_task_events.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Any, Callable, Optional, Union - -from pydantic import BaseModel - -from crewai.utilities.events.base_events import BaseEvent - - -class GuardrailTaskStartedEvent(BaseEvent): - """Event emitted when a guardrail task starts - - Attributes: - messages: Content can be either a string or a list of dictionaries that support - multimodal content (text, images, etc.) - """ - - type: str = "guardrail_task_started" - guardrail: Union[str, Callable] - retry_count: int - - -class GuardrailTaskCompletedEvent(BaseEvent): - """Event emitted when a guardrail task completes""" - - type: str = "guardrail_task_completed" - success: bool - result: Any - error: Optional[str] = None - retry_count: int diff --git a/tests/test_task_guardrails.py b/tests/test_task_guardrails.py index c17503199..3e92a2f6a 100644 --- a/tests/test_task_guardrails.py +++ b/tests/test_task_guardrails.py @@ -4,11 +4,11 @@ import pytest from crewai import Agent, Task from crewai.llm import LLM -from crewai.tasks.guardrail_task import GuardrailTask +from crewai.tasks.task_guardrail import TaskGuardrail from crewai.tasks.task_output import TaskOutput from crewai.utilities.events import ( - GuardrailTaskCompletedEvent, - GuardrailTaskStartedEvent, + TaskGuardrailCompletedEvent, + TaskGuardrailStartedEvent, ) from crewai.utilities.events.crewai_event_bus import crewai_event_bus @@ -148,20 +148,20 @@ def test_guardrail_using_llm(sample_agent): ) with patch( - "crewai.tasks.guardrail_task.GuardrailTask.__call__", + "crewai.tasks.task_guardrail.TaskGuardrail.__call__", side_effect=[(False, "bad result"), (True, "good result")], ) as mock_guardrail: task.execute_sync(agent=sample_agent) assert mock_guardrail.call_count == 2 - task.guardrail = GuardrailTask( + task.guardrail = TaskGuardrail( description="Ensure the output is equal to 'good result'", llm=LLM(model="gpt-4o-mini"), ) with patch( - "crewai.tasks.guardrail_task.GuardrailTask.__call__", + "crewai.tasks.task_guardrail.TaskGuardrail.__call__", side_effect=[(False, "bad result"), (True, "good result")], ) as mock_guardrail: task.execute_sync(agent=sample_agent) @@ -179,10 +179,10 @@ def task_output(): ) -def test_guardrail_task_initialization_no_llm(task_output): - """Test GuardrailTask initialization fails without LLM""" - with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"): - GuardrailTask(description="Test")(task_output) +def test_task_guardrail_initialization_no_llm(task_output): + """Test TaskGuardrail initialization fails without LLM""" + with pytest.raises(ValueError, match="Provide a valid LLM to the TaskGuardrail"): + TaskGuardrail(description="Test")(task_output) @pytest.fixture @@ -235,10 +235,10 @@ print(result) ], ) @patch("crewai_tools.CodeInterpreterTool.run") -def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output): +def test_task_guardrail_execute_code(mock_run, mock_llm, tool_run_output, task_output): mock_run.return_value = tool_run_output["output"] - guardrail = GuardrailTask(description="Test validation", llm=mock_llm) + guardrail = TaskGuardrail(description="Test validation", llm=mock_llm) result = guardrail(task_output) assert result[0] == tool_run_output["expected_result"] @@ -251,7 +251,7 @@ def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output additional_instructions = ( "This is an additional instruction created by the user follow it strictly" ) - guardrail = GuardrailTask( + guardrail = TaskGuardrail( description="Test validation", llm=mock_llm, additional_instructions=additional_instructions, @@ -269,13 +269,13 @@ def test_guardrail_emits_events(sample_agent): with crewai_event_bus.scoped_handlers(): - @crewai_event_bus.on(GuardrailTaskStartedEvent) + @crewai_event_bus.on(TaskGuardrailStartedEvent) def handle_guardrail_started(source, event): started_guardrail.append( {"guardrail": event.guardrail, "retry_count": event.retry_count} ) - @crewai_event_bus.on(GuardrailTaskCompletedEvent) + @crewai_event_bus.on(TaskGuardrailCompletedEvent) def handle_guardrail_completed(source, event): completed_guardrail.append( { @@ -293,7 +293,7 @@ def test_guardrail_emits_events(sample_agent): ) with patch( - "crewai.tasks.guardrail_task.GuardrailTask.__call__", + "crewai.tasks.task_guardrail.TaskGuardrail.__call__", side_effect=[(False, "bad result"), (True, "good result")], ): task.execute_sync(agent=sample_agent) @@ -326,8 +326,8 @@ def test_guardrail_emits_events(sample_agent): assert completed_guardrail == expected_completed_events -def test_guardrail_task_when_docker_is_not_available(mock_llm, task_output): - guardrail = GuardrailTask(description="Test validation", llm=mock_llm) +def test_task_guardrail_when_docker_is_not_available(mock_llm, task_output): + guardrail = TaskGuardrail(description="Test validation", llm=mock_llm) with ( patch( "crewai_tools.CodeInterpreterTool.__init__", return_value=None @@ -345,8 +345,8 @@ def test_guardrail_task_when_docker_is_not_available(mock_llm, task_output): mock_init.assert_called_once_with(code=ANY, unsafe_mode=True) -def test_guardrail_task_when_docker_is_available(mock_llm, task_output): - guardrail = GuardrailTask(description="Test validation", llm=mock_llm) +def test_task_guardrail_when_docker_is_available(mock_llm, task_output): + guardrail = TaskGuardrail(description="Test validation", llm=mock_llm) with ( patch( "crewai_tools.CodeInterpreterTool.__init__", return_value=None @@ -364,8 +364,8 @@ def test_guardrail_task_when_docker_is_available(mock_llm, task_output): mock_init.assert_called_once_with(code=ANY, unsafe_mode=False) -def test_guardrail_task_when_tool_output_is_not_valid(mock_llm, task_output): - guardrail = GuardrailTask(description="Test validation", llm=mock_llm) +def test_task_guardrail_when_tool_output_is_not_valid(mock_llm, task_output): + guardrail = TaskGuardrail(description="Test validation", llm=mock_llm) with ( patch( "crewai_tools.CodeInterpreterTool.__init__", return_value=None @@ -385,8 +385,8 @@ def test_guardrail_task_when_tool_output_is_not_valid(mock_llm, task_output): @pytest.mark.parametrize("unsafe_mode", [True, False]) -def test_guardrail_task_force_code_tool_unsafe_mode(mock_llm, task_output, unsafe_mode): - guardrail = GuardrailTask( +def test_task_guardrail_force_code_tool_unsafe_mode(mock_llm, task_output, unsafe_mode): + guardrail = TaskGuardrail( description="Test validation", llm=mock_llm, unsafe_mode=unsafe_mode ) with (