From 91b618b4e025a18a0634ac5a3088222d7ec9811a Mon Sep 17 00:00:00 2001 From: Lucas Gomide Date: Mon, 21 Apr 2025 18:59:56 -0300 Subject: [PATCH] feat: support to define a guardrail task no-code --- docs/concepts/tasks.mdx | 38 ++ src/crewai/task.py | 61 +- src/crewai/tasks/guardrail_task.py | 154 ++++++ src/crewai/utilities/events/__init__.py | 4 + src/crewai/utilities/events/event_types.py | 6 + .../utilities/events/guardrail_task_events.py | 28 + .../test_guardrail_emits_events.yaml | 307 ++++++++++ tests/cassettes/test_guardrail_using_llm.yaml | 522 ++++++++++++++++++ tests/test_task_guardrails.py | 202 ++++++- 9 files changed, 1307 insertions(+), 15 deletions(-) create mode 100644 src/crewai/tasks/guardrail_task.py create mode 100644 src/crewai/utilities/events/guardrail_task_events.py create mode 100644 tests/cassettes/test_guardrail_emits_events.yaml create mode 100644 tests/cassettes/test_guardrail_using_llm.yaml diff --git a/docs/concepts/tasks.mdx b/docs/concepts/tasks.mdx index a6eaf59b3..956c0cff7 100644 --- a/docs/concepts/tasks.mdx +++ b/docs/concepts/tasks.mdx @@ -322,6 +322,14 @@ blog_task = Task( - On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)` - On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")` +### GuardrailTask + +The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works: + +#### Code Execution + +The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode) + ### Error Handling Best Practices 1. **Structured Error Responses**: @@ -750,6 +758,8 @@ Task guardrails provide a powerful way to validate, transform, or filter task ou ### Basic Usage +#### Define your own logic to validate + ```python Code from typing import Tuple, Union from crewai import Task @@ -769,6 +779,34 @@ task = Task( ) ``` +#### Leverage a no-code approach for validation + +```python Code +from crewai import Task + +task = Task( + description="Generate JSON data", + expected_output="Valid JSON object", + guardrail="Ensure the response is a valid JSON object" +) +``` + +#### Use custom models for code generation + +```python Code +from crewai import Task +from crewai.llm import LLM + +task = Task( + description="Generate JSON data", + expected_output="Valid JSON object", + guardrail=GuardrailTask( + description="Ensure the response is a valid JSON object", + llm=LLM(model="gpt-4o-mini"), + ) +) +``` + ### How Guardrails Work 1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed. diff --git a/src/crewai/task.py b/src/crewai/task.py index 9874b5100..0aa62abb4 100644 --- a/src/crewai/task.py +++ b/src/crewai/task.py @@ -140,7 +140,7 @@ class Task(BaseModel): default=None, ) processed_by_agents: Set[str] = Field(default_factory=set) - guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field( + guardrail: Optional[Union[Callable[[TaskOutput], Tuple[bool, Any]], str]] = Field( default=None, description="Function to validate task output before proceeding to next task", ) @@ -157,8 +157,12 @@ class Task(BaseModel): @field_validator("guardrail") @classmethod - def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]: - """Validate that the guardrail function has the correct signature and behavior. + def validate_guardrail_function( + cls, v: Optional[str | Callable] + ) -> Optional[str | Callable]: + """ + If v is a callable, validate that the guardrail function has the correct signature and behavior. + If v is a string, return it as is. While type hints provide static checking, this validator ensures runtime safety by: 1. Verifying the function accepts exactly one parameter (the TaskOutput) @@ -171,16 +175,16 @@ class Task(BaseModel): - Clear error messages help users debug guardrail implementation issues Args: - v: The guardrail function to validate + v: The guardrail function to validate or a string describing the guardrail task Returns: - The validated guardrail function + The validated guardrail function or a string describing the guardrail task Raises: ValueError: If the function signature is invalid or return annotation doesn't match Tuple[bool, Any] """ - if v is not None: + if v is not None and callable(v): sig = inspect.signature(v) positional_args = [ param @@ -408,9 +412,7 @@ class Task(BaseModel): ) if self.guardrail: - guardrail_result = GuardrailResult.from_tuple( - self.guardrail(task_output) - ) + guardrail_result = self._process_guardrail(task_output) if not guardrail_result.success: if self.retry_count >= self.max_retries: raise Exception( @@ -464,13 +466,52 @@ class Task(BaseModel): ) ) self._save_file(content) - crewai_event_bus.emit(self, TaskCompletedEvent(output=task_output, task=self)) + crewai_event_bus.emit( + self, TaskCompletedEvent(output=task_output, task=self) + ) return task_output except Exception as e: self.end_time = datetime.datetime.now() crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self)) raise e # Re-raise the exception after emitting the event + def _process_guardrail(self, task_output: TaskOutput) -> GuardrailResult: + if self.guardrail is None: + raise ValueError("Guardrail is not set") + + from crewai.utilities.events import ( + GuardrailTaskCompletedEvent, + GuardrailTaskStartedEvent, + ) + from crewai.utilities.events.crewai_event_bus import crewai_event_bus + + crewai_event_bus.emit( + self, + GuardrailTaskStartedEvent( + guardrail=self.guardrail, retry_count=self.retry_count + ), + ) + + if isinstance(self.guardrail, str): + from crewai.tasks.guardrail_task import GuardrailTask + + result = GuardrailTask(description=self.guardrail, task=self)(task_output) + else: + result = self.guardrail(task_output) + + guardrail_result = GuardrailResult.from_tuple(result) + + crewai_event_bus.emit( + self, + GuardrailTaskCompletedEvent( + success=guardrail_result.success, + result=guardrail_result.result, + error=guardrail_result.error, + retry_count=self.retry_count, + ), + ) + return guardrail_result + def prompt(self) -> str: """Prompt the task. diff --git a/src/crewai/tasks/guardrail_task.py b/src/crewai/tasks/guardrail_task.py new file mode 100644 index 000000000..6244d5cbd --- /dev/null +++ b/src/crewai/tasks/guardrail_task.py @@ -0,0 +1,154 @@ +from typing import Any, Tuple + +from crewai.llm import LLM +from crewai.task import Task +from crewai.tasks.task_output import TaskOutput +from crewai.utilities.printer import Printer + + +class GuardrailTask: + """A task that validates the output of another task using generated Python code. + + This class generates and executes Python code to validate task outputs based on + specified criteria. It uses an LLM to generate the validation code and provides + safety guardrails for code execution. + + Args: + description (str): The description of the validation criteria. + task (Task, optional): The task whose output needs validation. + llm (LLM, optional): The language model to use for code generation. + additional_instructions (str, optional): Additional instructions for the guardrail task. + + Raises: + ValueError: If no valid LLM is provided. + """ + + def __init__( + self, + description: str, + task: Task | None = None, + llm: LLM | None = None, + unsafe_mode: bool = False, + additional_instructions: str = "", + ): + self.description = description + self.unsafe_mode: bool = unsafe_mode + + fallback_llm: LLM | None = ( + task.agent.llm + if task is not None + and hasattr(task, "agent") + and task.agent is not None + and hasattr(task.agent, "llm") + else None + ) + self.llm: LLM | None = llm or fallback_llm + + self.additional_instructions = additional_instructions + + @property + def system_instructions(self) -> str: + """System instructions for the LLM code generation. + + Returns: + str: Complete system instructions including security constraints. + """ + security_instructions = ( + "- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code." + "- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n" + "- Your code must not perform any file I/O, shell access, or dynamic code execution." + ) + return ( + "You are a expert Python developer" + "You **must strictly** follow the task description, use the provided raw output as the input in your code. " + "Your code must:\n" + "- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n" + "- Use the literal string of the task output (already included in your input) if needed.\n" + "- Generate the code **following strictly** the task description.\n" + "- Be valid Python 3 — executable as-is.\n" + f"{security_instructions}\n" + "Additional instructions (do not override the previous instructions):\n" + f"{self.additional_instructions}" + ) + + def user_instructions(self, task_output: TaskOutput) -> str: + """Generates user instructions for the LLM code generation. + + Args: + task_output (TaskOutput): The output to be validated. + + Returns: + str: Instructions for generating validation code. + """ + return ( + "Based on the task description below, generate Python 3 code that validates the task output. \n" + "Task description:\n" + f"{self.description}\n" + "Here is the raw output from the task: \n" + f"'{task_output.raw}' \n" + "Use this exact string literal inside your generated code (do not reference variables like task_output.raw)." + "Now generate Python code that follows the instructions above." + ) + + def generate_code(self, task_output: TaskOutput) -> str: + """Generates Python code for validating the task output. + + Args: + task_output (TaskOutput): The output to be validated. + + Returns: + str: Generated Python code for validation. + """ + if self.llm is None: + raise ValueError("Provide a valid LLM to the GuardrailTask") + + response = self.llm.call( + messages=[ + { + "role": "system", + "content": self.system_instructions, + }, + { + "role": "user", + "content": self.user_instructions(task_output=task_output), + }, + ] + ) + + printer = Printer() + printer.print( + content=f"The following code was generated for the guardrail task:\n{response}\n", + color="cyan", + ) + return response + + def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]: + """Executes the validation code on the task output. + + Args: + task_output (TaskOutput): The output to be validated. + + Returns: + Tuple[bool, Any]: A tuple containing: + - bool: True if validation passed, False otherwise + - Any: The validation result or error message + """ + import ast + + from crewai_tools import CodeInterpreterTool + + code = self.generate_code(task_output) + result = CodeInterpreterTool(code=code, unsafe_mode=self.unsafe_mode).run() + + error_messages = [ + "Something went wrong while running the code", + "No result variable found", # when running in unsafe mode, the final output should be stored in the result variable + ] + + if any(msg in result for msg in error_messages): + return False, result + + if isinstance(result, str): + result = ast.literal_eval(result) + + return result diff --git a/src/crewai/utilities/events/__init__.py b/src/crewai/utilities/events/__init__.py index 264f0ac5e..8fe14e8cb 100644 --- a/src/crewai/utilities/events/__init__.py +++ b/src/crewai/utilities/events/__init__.py @@ -9,6 +9,10 @@ from .crew_events import ( CrewTestCompletedEvent, CrewTestFailedEvent, ) +from .guardrail_task_events import ( + GuardrailTaskCompletedEvent, + GuardrailTaskStartedEvent, +) from .agent_events import ( AgentExecutionStartedEvent, AgentExecutionCompletedEvent, diff --git a/src/crewai/utilities/events/event_types.py b/src/crewai/utilities/events/event_types.py index 2ea514f37..f96cf564b 100644 --- a/src/crewai/utilities/events/event_types.py +++ b/src/crewai/utilities/events/event_types.py @@ -23,6 +23,10 @@ from .flow_events import ( MethodExecutionFinishedEvent, MethodExecutionStartedEvent, ) +from .guardrail_task_events import ( + GuardrailTaskCompletedEvent, + GuardrailTaskStartedEvent, +) from .llm_events import ( LLMCallCompletedEvent, LLMCallFailedEvent, @@ -68,4 +72,6 @@ EventTypes = Union[ LLMCallCompletedEvent, LLMCallFailedEvent, LLMStreamChunkEvent, + GuardrailTaskStartedEvent, + GuardrailTaskCompletedEvent, ] diff --git a/src/crewai/utilities/events/guardrail_task_events.py b/src/crewai/utilities/events/guardrail_task_events.py new file mode 100644 index 000000000..90f48a256 --- /dev/null +++ b/src/crewai/utilities/events/guardrail_task_events.py @@ -0,0 +1,28 @@ +from typing import Any, Callable, Optional, Union + +from pydantic import BaseModel + +from crewai.utilities.events.base_events import BaseEvent + + +class GuardrailTaskStartedEvent(BaseEvent): + """Event emitted when a guardrail task starts + + Attributes: + messages: Content can be either a string or a list of dictionaries that support + multimodal content (text, images, etc.) + """ + + type: str = "guardrail_task_started" + guardrail: Union[str, Callable] + retry_count: int + + +class GuardrailTaskCompletedEvent(BaseEvent): + """Event emitted when a guardrail task completes""" + + type: str = "guardrail_task_completed" + success: bool + result: Any + error: Optional[str] = None + retry_count: int diff --git a/tests/cassettes/test_guardrail_emits_events.yaml b/tests/cassettes/test_guardrail_emits_events.yaml new file mode 100644 index 000000000..2165a94fe --- /dev/null +++ b/tests/cassettes/test_guardrail_emits_events.yaml @@ -0,0 +1,307 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour + personal goal is: Test Goal\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis + is the expected criteria for your final answer: Output\nyou MUST return the + actual complete content as the final answer, not a summary.\n\nBegin! This is + VERY important to you, use the tools available and give your best Final Answer, + your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '807' + content-type: + - application/json + cookie: + - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc; + _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.12 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFfNjiPHDb7PUxA6LiRhZ3Z2xpnbOHCQWSNY2J4gQbLGgKpid9NTzWoX + qyRrjQX2IXzJ6+2TBKzu1s9mDrlIrWYVi/zI7yvq9wuABfvFHSxch9n1Q1h9+z7/8MM/dv/8E3H5 + fqdvdLh5//3mXx+/+/av/XeLpe2Im1/I5XnX2sV+CJQ5ymh2iTCTeb28vX57dXt1c3VTDX30FGxb + O+TVdVz1LLy6en11vXp9u7r8ZtrdRXakizv49wUAwO/10+IUT78t7uD1cn7Tkyq2tLg7LAJYpBjs + zQJVWTNKXiyPRhclk9TQH0DiDhwKtLwlQGgtbEDRHSWAD/IXFgxwX3/fwb0CwiNphvuWJC/hATrc + EmyIBDLqM3nYce5gSHHLnqUFhAkZgkQ6RFGC3GGGnigr5I6AfhvIZfLgEmdKjNDEBLHkoeQ1PHaU + qImJlmBPwOOmpoQAUyb1zFgyaOl7TPwRrQ5L8JSRA3lAhUS/Fk7k1x/kgzxI9ZEIQw+xgUyaWdol + DJgyuxIwhT1obPIOEx3NSltKGGqg7Owh9kMUkqz2SMCSIwwB9zVuaIp47EkyBsj7gfTkMGBxoXiC + Tclgp0jMELhnQyLHO4vzcg2vXv1dOFfQWdpXr+7gsWOdUWDhzBhg6FAJdiNA4nnLvpyHFxP00Zcw + xdDRMb05RfLAAqwxVPzWUE82i0JHYQASLWkqH6HrKlyzuyHFNmFvOTvbboikRC6HPUQBthh2soRd + x64DZK92msPsOkNjU1oFqrjzWB1PWwpxMPjMuSPVWrwrA+VBMrWpxnmKzX2TKUGZ465F45OlM/ZN + DCHudD2COaLXRFfU4BHYUuJmbwtHkDMlrEnBhvLOmn2LiWPRrxDWvWbqFVD8CNboArP52cMupmfI + saXcUbKmNM/ip6Z8Y3n9VD2cpvReHB3CsDyMKMdTT/hXS7icgjhps20MW1LYYmCPec7qQEsL1ti0 + Pz3h0BxDir64PCE1doCOOfEpiXUgxw2Tn5nW1/DMuaWtlu6B6CyAlX2rXUzBsNpyimJbKhTXBsW9 + czRktPRf6v79YAwM+1ENqkyNdTyhmJ3OCkVLXekwJYvQpGKzBxK/Kkqpls4FtoBH3rYRQz0kzqiN + JSiKGw6c9xNoY6djfTOzakQfW2TRDJuiLKR6hsoaTnKbY3VRGk69HvrlWAO27ej3VRc9DSHuKyss + iGMFLJUvn/+jIER+pMpjhAadRTxloMfzJkqRAjUNucxbCvvlobFzjEGrNIQqei35io15qCa7NCYJ + q1q1spKVHPvaP1PF4NHW1rrVTYGfCX6iQMKlr+nsaAM4DIEdjqoRE7yrwmPWd7jFczMaccdeT7VA + OJ0JiQbKbHkccrQrSWfJYbE7eUqYHZO4sY6JfHE0gjhEVZ5qHBvoSo8ClFJM6znJev/9DQXbWs2v + U9Ri8qbw7uHHe0vGlv+IHCqWBrhUxbbsckL3XOM0l1VASXUJvTk/vHcW83KKdIgpzxb7jiXbxaOH + 6P4cJbMUK+GZRr5Qh3ckzyyTthslRyRngZgaxGBIRWQ+tTL5VPjPdJoHCiy0/Er+UPbgOpSWFHr0 + NGu8i542RloDxx1CN1ZXNZtvaxet07Te6rmLKZa2O1a5G3/PrasZW9Ivn/+we+BM/2dxHNHEAwm/ + fP7DxX7DMs8vlX7ZpLBlZ8yyZKvEYeCP9YKOQZcQcD+NI9Gu+gr1SNLAdolICx233erXMorETOk1 + PORzMZ0QN62tk0CUsH/xLq3jQtA4Md9YPwnryBAbYrZzDXc4ioYWZ7VsSgA22a8zyUQo8aCYWat4 + zzJ4Oi8maoqizaxSQjgxoEic3Nik+vNk+XSYTUNshxQ3+tXWRcPC2j0ZHaPYHKo5Dotq/XQB8HOd + gcvZWLsYUuyH/JTjM9XjLt++Gf0tjqP30Xr99nay5pgxHA03l9PofO7waZwV9WSMXjh0Hfnj1uPM + jcVzPDFcnKT9v+G85HtMnaX9f9wfDbVjyT8NiTy785SPyxL9Uu/Zl5cdYK4BL5TSlh09ZaZkpfDU + YAnjH4bFSJenhqWlNCQe/zU0w1Nz69/e4Dd45RYXny7+CwAA//8DAOzQwR9DDQAA + headers: + CF-RAY: + - 93402298d9980110-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 21 Apr 2025 21:57:12 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '6385' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999832' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2a19c29e1e9dd766289937937418044a + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour + personal goal is: Test Goal\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis + is the expected criteria for your final answer: Output\nyou MUST return the + actual complete content as the final answer, not a summary.\n\nThis is the context + you''re working with:\n### Previous attempt failed validation: bad result\n\n\n### + Previous result:\nAs a Test Agent, I have been tasked with providing a complete + response that meets the expected criteria for output. Therefore, here is the + full content without summarization, detailed as required.\n\nIn the realm of + testing, particularly software testing, several critical components come into + play. The fundamental types of testing include but are not limited to:\n\n1. + **Unit Testing**: This is the initial phase where individual components or modules + of the software are tested in isolation. Unit tests help ensure that each part + of the program functions correctly on its own, which aids in catching bugs early + in the development process.\n\n2. **Integration Testing**: After unit testing, + integration testing follows. This phase focuses on verifying the interaction + between various components or systems and ensuring that they work together as + intended.\n\n3. **System Testing**: Once the integrated components have been + tested, system testing involves validating the complete and fully integrated + software product. This ensures that it meets the specified requirements and + works as expected in a real-world environment.\n\n4. **Acceptance Testing**: + This is typically the final phase of testing and is usually carried out by end-users + or clients. The goal is to validate the usability and functionality of the system + against business requirements. Acceptance testing confirms that the software + is ready for deployment and meets the user\u2019s needs.\n\nTo facilitate these + testing processes effectively, various tools are leveraged. These tools can + include:\n\n- **Automated Testing Tools**: Tools like Selenium for web applications + or JUnit for Java applications allow testers to automate repetitive testing + tasks, which increases efficiency and reduces the possibility of human error.\n\n- + **Test Management Tools**: Tools such as JIRA or TestRail are essential for + tracking test progress, managing test cases, and reporting testing outcomes.\n\n- + **Continuous Integration Tools**: Tools like Jenkins help in automating the + process of running tests as part of the development pipeline, ensuring that + any changes made in the codebase are continuously tested.\n\nIn conclusion, + thorough testing through various stages\u2014unit, integration, system, and + acceptance\u2014combined with the strategic use of specialized tools, lays the + foundation for delivering high-quality software. It ensures that the product + not only functions correctly but also meets user expectations, paving the way + for successful implementations and satisfied clients.\n\n\nTry again, making + sure to address the validation error.\n\nBegin! This is VERY important to you, + use the tools available and give your best Final Answer, your job depends on + it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '3539' + content-type: + - application/json + cookie: + - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc; + _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.12 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RXTY/cuBG9768ojA9ZG90Ne3YSB3ObDJxgjMTe2G0ESHxhkyWpdiiWzI9uy/vn + gypKavXYexn0SGSx+F69V6XffwK4Ind1C1e2M9n2g9/+7X3+93+af7rxtw+PN2OPx5tCzc2bb+Px + v+/91UZ28OE3tHnetbPcDx4zcaivbUSTUaK+en3z5+vX13/55Vpf9OzQy7Z2yNsb3vYUaHv98vpm + +/L19tVfp90dk8V0dQv/+wkA4Hf9K3kGh1+vbuHlZn7SY0qmxavbZRHAVWQvT65MSpSyCflqc35p + OWQMmvoDBD6BNQFaOiIYaCVtMCGdMAJ8Dn+nYDzc6f+38BAgdwgRje+BG0jc5JOJCBlTptBuwIAA + EbHDkDTiMEQ2tgNKgClhyGQ8ZAYMqcjGTo4bBk/WCHgJesQMX4rxlEeQ3J2JLoEJDpriG/IeSsII + +HVAm+umHew7hIa95xOFFrhkTwGTZtuU4EyPIRuvyXHAkJOkL2+fXgE8NWhH63ED2A+dSfRNHh9N + JC4JPB7R193TBslMIq2vxz5ByeTpG8rLyKXtuGRdN0S2mNLuc/gcnj17BvtxQA34cU5lXyPLglc7 + ePHiU6A8P3zx4hb2HSXI44DrNLKJLeYEFBwdyRXj5ea2gsoReswduwQnyh1VHi07PJiET/lAIazI + oQPGhmOfwKQJcHQ7eMhCp2JLMWUQrCUVhw2GhGBaQyFlOJQ2bQCDOXjJ0Al0PGBMcqA12XaAMXJM + cMCG9fQVIUPkNgqoutybjFHqocW0A0VEbp5AuRsHssb7EUzJ3IvwoCQ5s4mmxxPHxwSeHhHe6s6G + I7w1RyO4vFue7N692W/AzEX0pZB9hCNGaqbiBFeiQt3hfBmpqwtGr4Wwh5CxjXXPire7Ru5Q5txV + MJT/lMDGYidd6HkjdHwCR02DUQ7o2RWPwm3GaGxWEoEDggmcO4w7WB+5VKaAJ/idOpRVUv4HCujW + OhBwIHM7r4gRbfbjBshJOTejRNKDG2NROba56pFSKpqVIjInJxkcMJ8Qgz6TrISPQFmFSglEVqi2 + Q+HI/ohw4NxBb4KUrYQ+87jUN9ou0Bc5sFJbVaas/sop9yYoi3e/Pix7OCr87/5xZnztNUrYL0LY + xzFl7L/TWM1zyjGdU1HtqN+jW0cUmRg4dexxB1PMeRMejS8mT6aEwW0zbzE4SHVdGtAulaaySXO1 + mXypjAN2RvJZiRKMjZzSYlPJYpCflSjLwdHklA8ZMEj2Wh2LR0y4Bw7b1aNzmQbri5N0JkswweIG + SjIHEqfe6O6EtkTx7QUphraYaEJGPF9E6irifHFxBMUl9cy586MUFOVa7cGhAwxHihxEa0rZjVB2 + Zy0OWdJY03bPwRUF5OwJh1Hhlq6hTpiyecSOvcOYNmDOceasHWaMvTaQWTmKf81XGlQlcbXVRsoY + yUy6kBbpRq06h4PnUXOvRdXQCtm5xBIMdGRpUbKnmsCPyZ+aYIKDyACTnPWlUMRe5Szna38MiC7t + 4J77Xgpz6sMqV6ES4ZOs+h5F+PnT3f65xnmvzNRK+NHC93f750sbe7O0v/n9XgQqr/cM2DRkCUP2 + o8jctNXsFxSqg6LwoTWMedQBQ0RhpjaqehcIsBdI0d1K8K3UwuIWF2er59ZXIk46G62AjKEzwc56 + TgOKioKDiJ6mql71110NCanYToT3ET0GKr2GOuHhcoiZ7QsiDpgpyyik7WoDp448rs3rbhgkjrjh + AYU7pzF7PshCMwxnFSZqg3qE4kj9EPmoFygxmMhFJhHqsVaBsbZEY8fdjJJgA/9S8LVtLRjtz8m8 + ffhwN9vmB0Ne8ZaRIg0Ykjl41OQ4tibUwWjxt6bhWB0eE8LgTa6jQ47GPuqypaVvlhqQp1YU4NgW + HdIEwOomjbHCgsBo2Xtz4Km/mZ71YCNi7A8q48UurTdqQQqBp1Zt4+kM9qTuFojuOWQKRRz0oofP + SP3B+5/vH55XSjdLgbzF8Eh19LqnaD3eP2zOdbFOghudxMB2JrQTeSuN0OqgKV+xtFhCmPFPdWQT + 7msoKXXue8o6r9Vepr3ZInToh3Q5f4cRAp7UeXNkV6yOCE5YwQSBMxwimkfAr1RTPrcI9f6UI5re + U/jRdCQDRuP5tDjFPYsDJeKg5jA10/M3wx+P5BedSwaKzRqdzeTQm7n8n/j6pqKUMKpoDDi9jc0w + Yq7ji+UQaj8dShxYmz849CSOHFroqO22y8fJlKeYbBk8ujqWqZtkaWkt2YvpgJsfzTW1bFQJ/Vmc + iX2ZJgG5zf3DslCH/461ns/fV5XVeWabjMxOv0cYOE/uPA1t08S97k6ffCbJTYY/bGTMm21L9a0F + k37QkaQ+OIixa2dcjQ8XnelQMuBXK23p+y+4Oj6UYRAPEQ1JmTfFT9NBDcclW+5xniIdeWpX8zd4 + M9bG3KoX6mQrblUd3V9+W7hi/w8AAP//jJhBbsQgDEX3OUXEASp12s70MiPkgkndMoDALHP3CsIE + 0s6i6weOfyKI/+emZkFXR5BZ5cT+VnwGMCXThtnS2v6zba09jYY6oskJiql32doBgHO+CSxW/trI + upt365cQ/Uf6tVUYcpQ+ZURI3hWjntgHUek6zfO1hgT54PtFiP4WWLL/xvq480sLCUTPJgb6eqdc + xo4Onk+XOzlUlBoZyKYhaBCqTBS67+2pBGRNfgDToPtvP49qb9rJLf8p30E99ahliKhJHTX3ZRG/ + 6jF/vGx/z7VhUS8MhZIJY/kWGg1ku0UqYrtypCG3YAyRtlzFBGku+u0M73BSYlqnHwAAAP//AwDv + DD9WZRIAAA== + headers: + CF-RAY: + - 934022c27c860110-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 21 Apr 2025 21:57:22 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '9187' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999158' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_94bb40dead4c4e9c7fa12de3bfb636b7 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_guardrail_using_llm.yaml b/tests/cassettes/test_guardrail_using_llm.yaml new file mode 100644 index 000000000..41f311047 --- /dev/null +++ b/tests/cassettes/test_guardrail_using_llm.yaml @@ -0,0 +1,522 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour + personal goal is: Test Goal\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis + is the expected criteria for your final answer: Output\nyou MUST return the + actual complete content as the final answer, not a summary.\n\nBegin! This is + VERY important to you, use the tools available and give your best Final Answer, + your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '807' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.12 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu + 8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj + aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9 + RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD + AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o + MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP + mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc + lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo + 2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS + GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4 + EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM + +Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb + p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY + WrN6Wv0LAAD//wMAAfXtOswFAAA= + headers: + CF-RAY: + - 934022059c2c0110-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 21 Apr 2025 21:56:45 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc; + path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '2377' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999832' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f39581c88a83855cf77c06098b787948 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour + personal goal is: Test Goal\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis + is the expected criteria for your final answer: Output\nyou MUST return the + actual complete content as the final answer, not a summary.\n\nThis is the context + you''re working with:\n### Previous attempt failed validation: bad result\n\n\n### + Previous result:\nThe task at hand requires a comprehensive and detailed response + that meets the expected criteria for output. In fulfilling this requirement, + it is essential to cover all relevant aspects and provide complete content, + ensuring that the information is clear, concise, and accurately addresses the + needs of the task. Therefore, I will outline the necessary components, adhere + to any guidelines provided, and ensure that the final output is thorough and + well-presented, without omitting important details or summarizing the information. + This approach will guarantee a high-quality response that satisfies the outlined + expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin! + This is VERY important to you, use the tools available and give your best Final + Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": + ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '1619' + content-type: + - application/json + cookie: + - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc; + _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.12 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q + ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f + PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz + F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf + NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR + xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig + 8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h + y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x + moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+ + fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K + gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw + 5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA + Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW + ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9 + cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W + ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG + mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw + jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip + ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B + +FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw + GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL + LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA== + headers: + CF-RAY: + - 93402216690b0110-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 21 Apr 2025 21:56:49 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '4451' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999631' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_8383a16d5f5b7f53d659bebf481ba936 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour + personal goal is: Test Goal\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis + is the expected criteria for your final answer: Output\nyou MUST return the + actual complete content as the final answer, not a summary.\n\nBegin! This is + VERY important to you, use the tools available and give your best Final Answer, + your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '807' + content-type: + - application/json + cookie: + - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc; + _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.12 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e + v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw + n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3 + s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS + t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs + JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR + 6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI + uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2 + SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q + 6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y + ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby + K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc + Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS + 8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp + +/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR + t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA + headers: + CF-RAY: + - 93402233baf00110-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 21 Apr 2025 21:56:56 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '6058' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999832' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f5273114a4a797fd0928674edb442194 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour + personal goal is: Test Goal\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis + is the expected criteria for your final answer: Output\nyou MUST return the + actual complete content as the final answer, not a summary.\n\nThis is the context + you''re working with:\n### Previous attempt failed validation: bad result\n\n\n### + Previous result:\nTest tasks are designed to evaluate specific skills or knowledge + in a controlled environment. The criteria for evaluating success include clarity + of instructions, alignment with learning objectives, and the level of engagement + they elicit from participants. Specifically, a test task should include a clear + and concise prompt that outlines what is expected from the participants. Additionally, + it should have a well-defined scoring rubric that assesses the quality of responses + based on predetermined criteria. By ensuring that the test task is relevant + and challenging, it will effectively measure the participants\u2019 capabilities + and provide valuable insights into their understanding of the subject matter. + Effective preparation, testing methods, and feedback mechanisms are essential + to the success of any test task, while also maintaining an environment conducive + to learning and growth.\n\n\nTry again, making sure to address the validation + error.\n\nBegin! This is VERY important to you, use the tools available and + give your best Final Answer, your job depends on it!\n\nThought:"}], "model": + "gpt-4o-mini", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '1887' + content-type: + - application/json + cookie: + - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc; + _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.12 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG + fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/ + +9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8 + HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi + e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc + i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs + CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ + ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7 + QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz + dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH + xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY + 4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs + 9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi + edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY + qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+ + teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u + YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z + 5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs + xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd + VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI + 5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM + kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0 + Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n + 6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+ + pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj + IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO + LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4 + VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin + C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o + y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U + sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL + DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA= + headers: + CF-RAY: + - 9340225b9bca0110-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 21 Apr 2025 21:57:05 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '9141' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999564' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_0fc29337116c1d19a0543dfe5b0db291 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_task_guardrails.py b/tests/test_task_guardrails.py index e22e76234..fbf11cfc6 100644 --- a/tests/test_task_guardrails.py +++ b/tests/test_task_guardrails.py @@ -1,11 +1,16 @@ -"""Tests for task guardrails functionality.""" - -from unittest.mock import Mock +from unittest.mock import Mock, patch import pytest -from crewai.task import Task +from crewai import Agent, Task +from crewai.llm import LLM +from crewai.tasks.guardrail_task import GuardrailTask from crewai.tasks.task_output import TaskOutput +from crewai.utilities.events import ( + GuardrailTaskCompletedEvent, + GuardrailTaskStartedEvent, +) +from crewai.utilities.events.crewai_event_bus import crewai_event_bus def test_task_without_guardrail(): @@ -22,7 +27,7 @@ def test_task_without_guardrail(): assert result.raw == "test result" -def test_task_with_successful_guardrail(): +def test_task_with_successful_guardrail_func(): """Test that successful guardrail validation passes transformed result.""" def guardrail(result: TaskOutput): @@ -127,3 +132,190 @@ def test_guardrail_error_in_context(): assert "Task failed guardrail validation" in str(exc_info.value) assert "Expected JSON, got string" in str(exc_info.value) + + +@pytest.fixture +def sample_agent(): + return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory") + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_guardrail_using_llm(sample_agent): + task = Task( + description="Test task", + expected_output="Output", + guardrail="Ensure the output is equal to 'good result'", + ) + + with patch( + "crewai.tasks.guardrail_task.GuardrailTask.__call__", + side_effect=[(False, "bad result"), (True, "good result")], + ) as mock_guardrail: + task.execute_sync(agent=sample_agent) + + assert mock_guardrail.call_count == 2 + + task.guardrail = GuardrailTask( + description="Ensure the output is equal to 'good result'", + llm=LLM(model="gpt-4o-mini"), + ) + + with patch( + "crewai.tasks.guardrail_task.GuardrailTask.__call__", + side_effect=[(False, "bad result"), (True, "good result")], + ) as mock_guardrail: + task.execute_sync(agent=sample_agent) + + assert mock_guardrail.call_count == 2 + + +@pytest.fixture +def task_output(): + return TaskOutput( + raw="Test output", + description="Test task", + expected_output="Output", + agent="Test Agent", + ) + + +def test_guardrail_task_initialization_no_llm(task_output): + """Test GuardrailTask initialization fails without LLM""" + with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"): + GuardrailTask(description="Test")(task_output) + + +@pytest.fixture +def mock_llm(): + llm = Mock(spec=LLM) + llm.call.return_value = """ +output = 'Sample book data' +if isinstance(output, str): + result = (True, output) +else: + result = (False, 'Invalid output format') +print(result) +""" + return llm + + +@pytest.mark.parametrize( + "tool_run_output", + [ + { + "output": "(True, 'Valid output')", + "expected_result": True, + "expected_output": "Valid output", + }, + { + "output": "(False, 'Invalid output format')", + "expected_result": False, + "expected_output": "Invalid output format", + }, + { + "output": "Something went wrong while running the code, Invalid output format", + "expected_result": False, + "expected_output": "Something went wrong while running the code, Invalid output format", + }, + { + "output": "No result variable found", + "expected_result": False, + "expected_output": "No result variable found", + }, + { + "output": (False, "Invalid output format"), + "expected_result": False, + "expected_output": "Invalid output format", + }, + ], +) +@patch("crewai_tools.CodeInterpreterTool.run") +def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output): + mock_run.return_value = tool_run_output["output"] + + guardrail = GuardrailTask(description="Test validation", llm=mock_llm) + + result = guardrail(task_output) + assert result[0] == tool_run_output["expected_result"] + assert result[1] == tool_run_output["expected_output"] + + +@patch("crewai_tools.CodeInterpreterTool.run") +def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output): + mock_run.return_value = "(True, 'Valid output')" + additional_instructions = ( + "This is an additional instruction created by the user follow it strictly" + ) + guardrail = GuardrailTask( + description="Test validation", + llm=mock_llm, + additional_instructions=additional_instructions, + ) + + guardrail(task_output) + + assert additional_instructions in str(mock_llm.call.call_args) + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_guardrail_emits_events(sample_agent): + started_guardrail = [] + completed_guardrail = [] + + with crewai_event_bus.scoped_handlers(): + + @crewai_event_bus.on(GuardrailTaskStartedEvent) + def handle_guardrail_started(source, event): + started_guardrail.append( + {"guardrail": event.guardrail, "retry_count": event.retry_count} + ) + + @crewai_event_bus.on(GuardrailTaskCompletedEvent) + def handle_guardrail_completed(source, event): + completed_guardrail.append( + { + "success": event.success, + "result": event.result, + "error": event.error, + "retry_count": event.retry_count, + } + ) + + task = Task( + description="Test task", + expected_output="Output", + guardrail="Ensure the output is equal to 'good result'", + ) + + with patch( + "crewai.tasks.guardrail_task.GuardrailTask.__call__", + side_effect=[(False, "bad result"), (True, "good result")], + ): + task.execute_sync(agent=sample_agent) + + expected_started_events = [ + { + "guardrail": "Ensure the output is equal to 'good result'", + "retry_count": 0, + }, + { + "guardrail": "Ensure the output is equal to 'good result'", + "retry_count": 1, + }, + ] + expected_completed_events = [ + { + "success": False, + "result": None, + "error": "bad result", + "retry_count": 0, + }, + { + "success": True, + "result": "good result", + "error": None, + "retry_count": 1, + }, + ] + assert started_guardrail == expected_started_events + assert completed_guardrail == expected_completed_events