feat: support to define a guardrail task no-code

This commit is contained in:
Lucas Gomide
2025-04-21 18:59:56 -03:00
parent 685d20f46c
commit 91b618b4e0
9 changed files with 1307 additions and 15 deletions

View File

@@ -322,6 +322,14 @@ blog_task = Task(
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
### GuardrailTask
The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
#### Code Execution
The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
### Error Handling Best Practices
1. **Structured Error Responses**:
@@ -750,6 +758,8 @@ Task guardrails provide a powerful way to validate, transform, or filter task ou
### Basic Usage
#### Define your own logic to validate
```python Code
from typing import Tuple, Union
from crewai import Task
@@ -769,6 +779,34 @@ task = Task(
)
```
#### Leverage a no-code approach for validation
```python Code
from crewai import Task
task = Task(
description="Generate JSON data",
expected_output="Valid JSON object",
guardrail="Ensure the response is a valid JSON object"
)
```
#### Use custom models for code generation
```python Code
from crewai import Task
from crewai.llm import LLM
task = Task(
description="Generate JSON data",
expected_output="Valid JSON object",
guardrail=GuardrailTask(
description="Ensure the response is a valid JSON object",
llm=LLM(model="gpt-4o-mini"),
)
)
```
### How Guardrails Work
1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.

View File

@@ -140,7 +140,7 @@ class Task(BaseModel):
default=None,
)
processed_by_agents: Set[str] = Field(default_factory=set)
guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
guardrail: Optional[Union[Callable[[TaskOutput], Tuple[bool, Any]], str]] = Field(
default=None,
description="Function to validate task output before proceeding to next task",
)
@@ -157,8 +157,12 @@ class Task(BaseModel):
@field_validator("guardrail")
@classmethod
def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
"""Validate that the guardrail function has the correct signature and behavior.
def validate_guardrail_function(
cls, v: Optional[str | Callable]
) -> Optional[str | Callable]:
"""
If v is a callable, validate that the guardrail function has the correct signature and behavior.
If v is a string, return it as is.
While type hints provide static checking, this validator ensures runtime safety by:
1. Verifying the function accepts exactly one parameter (the TaskOutput)
@@ -171,16 +175,16 @@ class Task(BaseModel):
- Clear error messages help users debug guardrail implementation issues
Args:
v: The guardrail function to validate
v: The guardrail function to validate or a string describing the guardrail task
Returns:
The validated guardrail function
The validated guardrail function or a string describing the guardrail task
Raises:
ValueError: If the function signature is invalid or return annotation
doesn't match Tuple[bool, Any]
"""
if v is not None:
if v is not None and callable(v):
sig = inspect.signature(v)
positional_args = [
param
@@ -408,9 +412,7 @@ class Task(BaseModel):
)
if self.guardrail:
guardrail_result = GuardrailResult.from_tuple(
self.guardrail(task_output)
)
guardrail_result = self._process_guardrail(task_output)
if not guardrail_result.success:
if self.retry_count >= self.max_retries:
raise Exception(
@@ -464,13 +466,52 @@ class Task(BaseModel):
)
)
self._save_file(content)
crewai_event_bus.emit(self, TaskCompletedEvent(output=task_output, task=self))
crewai_event_bus.emit(
self, TaskCompletedEvent(output=task_output, task=self)
)
return task_output
except Exception as e:
self.end_time = datetime.datetime.now()
crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self))
raise e # Re-raise the exception after emitting the event
def _process_guardrail(self, task_output: TaskOutput) -> GuardrailResult:
if self.guardrail is None:
raise ValueError("Guardrail is not set")
from crewai.utilities.events import (
GuardrailTaskCompletedEvent,
GuardrailTaskStartedEvent,
)
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
crewai_event_bus.emit(
self,
GuardrailTaskStartedEvent(
guardrail=self.guardrail, retry_count=self.retry_count
),
)
if isinstance(self.guardrail, str):
from crewai.tasks.guardrail_task import GuardrailTask
result = GuardrailTask(description=self.guardrail, task=self)(task_output)
else:
result = self.guardrail(task_output)
guardrail_result = GuardrailResult.from_tuple(result)
crewai_event_bus.emit(
self,
GuardrailTaskCompletedEvent(
success=guardrail_result.success,
result=guardrail_result.result,
error=guardrail_result.error,
retry_count=self.retry_count,
),
)
return guardrail_result
def prompt(self) -> str:
"""Prompt the task.

View File

@@ -0,0 +1,154 @@
from typing import Any, Tuple
from crewai.llm import LLM
from crewai.task import Task
from crewai.tasks.task_output import TaskOutput
from crewai.utilities.printer import Printer
class GuardrailTask:
"""A task that validates the output of another task using generated Python code.
This class generates and executes Python code to validate task outputs based on
specified criteria. It uses an LLM to generate the validation code and provides
safety guardrails for code execution.
Args:
description (str): The description of the validation criteria.
task (Task, optional): The task whose output needs validation.
llm (LLM, optional): The language model to use for code generation.
additional_instructions (str, optional): Additional instructions for the guardrail task.
Raises:
ValueError: If no valid LLM is provided.
"""
def __init__(
self,
description: str,
task: Task | None = None,
llm: LLM | None = None,
unsafe_mode: bool = False,
additional_instructions: str = "",
):
self.description = description
self.unsafe_mode: bool = unsafe_mode
fallback_llm: LLM | None = (
task.agent.llm
if task is not None
and hasattr(task, "agent")
and task.agent is not None
and hasattr(task.agent, "llm")
else None
)
self.llm: LLM | None = llm or fallback_llm
self.additional_instructions = additional_instructions
@property
def system_instructions(self) -> str:
"""System instructions for the LLM code generation.
Returns:
str: Complete system instructions including security constraints.
"""
security_instructions = (
"- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
"- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
"- Your code must not perform any file I/O, shell access, or dynamic code execution."
)
return (
"You are a expert Python developer"
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
"Your code must:\n"
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n"
"- Use the literal string of the task output (already included in your input) if needed.\n"
"- Generate the code **following strictly** the task description.\n"
"- Be valid Python 3 — executable as-is.\n"
f"{security_instructions}\n"
"Additional instructions (do not override the previous instructions):\n"
f"{self.additional_instructions}"
)
def user_instructions(self, task_output: TaskOutput) -> str:
"""Generates user instructions for the LLM code generation.
Args:
task_output (TaskOutput): The output to be validated.
Returns:
str: Instructions for generating validation code.
"""
return (
"Based on the task description below, generate Python 3 code that validates the task output. \n"
"Task description:\n"
f"{self.description}\n"
"Here is the raw output from the task: \n"
f"'{task_output.raw}' \n"
"Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
"Now generate Python code that follows the instructions above."
)
def generate_code(self, task_output: TaskOutput) -> str:
"""Generates Python code for validating the task output.
Args:
task_output (TaskOutput): The output to be validated.
Returns:
str: Generated Python code for validation.
"""
if self.llm is None:
raise ValueError("Provide a valid LLM to the GuardrailTask")
response = self.llm.call(
messages=[
{
"role": "system",
"content": self.system_instructions,
},
{
"role": "user",
"content": self.user_instructions(task_output=task_output),
},
]
)
printer = Printer()
printer.print(
content=f"The following code was generated for the guardrail task:\n{response}\n",
color="cyan",
)
return response
def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
"""Executes the validation code on the task output.
Args:
task_output (TaskOutput): The output to be validated.
Returns:
Tuple[bool, Any]: A tuple containing:
- bool: True if validation passed, False otherwise
- Any: The validation result or error message
"""
import ast
from crewai_tools import CodeInterpreterTool
code = self.generate_code(task_output)
result = CodeInterpreterTool(code=code, unsafe_mode=self.unsafe_mode).run()
error_messages = [
"Something went wrong while running the code",
"No result variable found", # when running in unsafe mode, the final output should be stored in the result variable
]
if any(msg in result for msg in error_messages):
return False, result
if isinstance(result, str):
result = ast.literal_eval(result)
return result

View File

@@ -9,6 +9,10 @@ from .crew_events import (
CrewTestCompletedEvent,
CrewTestFailedEvent,
)
from .guardrail_task_events import (
GuardrailTaskCompletedEvent,
GuardrailTaskStartedEvent,
)
from .agent_events import (
AgentExecutionStartedEvent,
AgentExecutionCompletedEvent,

View File

@@ -23,6 +23,10 @@ from .flow_events import (
MethodExecutionFinishedEvent,
MethodExecutionStartedEvent,
)
from .guardrail_task_events import (
GuardrailTaskCompletedEvent,
GuardrailTaskStartedEvent,
)
from .llm_events import (
LLMCallCompletedEvent,
LLMCallFailedEvent,
@@ -68,4 +72,6 @@ EventTypes = Union[
LLMCallCompletedEvent,
LLMCallFailedEvent,
LLMStreamChunkEvent,
GuardrailTaskStartedEvent,
GuardrailTaskCompletedEvent,
]

View File

@@ -0,0 +1,28 @@
from typing import Any, Callable, Optional, Union
from pydantic import BaseModel
from crewai.utilities.events.base_events import BaseEvent
class GuardrailTaskStartedEvent(BaseEvent):
"""Event emitted when a guardrail task starts
Attributes:
messages: Content can be either a string or a list of dictionaries that support
multimodal content (text, images, etc.)
"""
type: str = "guardrail_task_started"
guardrail: Union[str, Callable]
retry_count: int
class GuardrailTaskCompletedEvent(BaseEvent):
"""Event emitted when a guardrail task completes"""
type: str = "guardrail_task_completed"
success: bool
result: Any
error: Optional[str] = None
retry_count: int

View File

@@ -0,0 +1,307 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
personal goal is: Test Goal\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
is the expected criteria for your final answer: Output\nyou MUST return the
actual complete content as the final answer, not a summary.\n\nBegin! This is
VERY important to you, use the tools available and give your best Final Answer,
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '807'
content-type:
- application/json
cookie:
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.68.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.68.2
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- '600.0'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.12
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//jFfNjiPHDb7PUxA6LiRhZ3Z2xpnbOHCQWSNY2J4gQbLGgKpid9NTzWoX
qyRrjQX2IXzJ6+2TBKzu1s9mDrlIrWYVi/zI7yvq9wuABfvFHSxch9n1Q1h9+z7/8MM/dv/8E3H5
fqdvdLh5//3mXx+/+/av/XeLpe2Im1/I5XnX2sV+CJQ5ymh2iTCTeb28vX57dXt1c3VTDX30FGxb
O+TVdVz1LLy6en11vXp9u7r8ZtrdRXakizv49wUAwO/10+IUT78t7uD1cn7Tkyq2tLg7LAJYpBjs
zQJVWTNKXiyPRhclk9TQH0DiDhwKtLwlQGgtbEDRHSWAD/IXFgxwX3/fwb0CwiNphvuWJC/hATrc
EmyIBDLqM3nYce5gSHHLnqUFhAkZgkQ6RFGC3GGGnigr5I6AfhvIZfLgEmdKjNDEBLHkoeQ1PHaU
qImJlmBPwOOmpoQAUyb1zFgyaOl7TPwRrQ5L8JSRA3lAhUS/Fk7k1x/kgzxI9ZEIQw+xgUyaWdol
DJgyuxIwhT1obPIOEx3NSltKGGqg7Owh9kMUkqz2SMCSIwwB9zVuaIp47EkyBsj7gfTkMGBxoXiC
Tclgp0jMELhnQyLHO4vzcg2vXv1dOFfQWdpXr+7gsWOdUWDhzBhg6FAJdiNA4nnLvpyHFxP00Zcw
xdDRMb05RfLAAqwxVPzWUE82i0JHYQASLWkqH6HrKlyzuyHFNmFvOTvbboikRC6HPUQBthh2soRd
x64DZK92msPsOkNjU1oFqrjzWB1PWwpxMPjMuSPVWrwrA+VBMrWpxnmKzX2TKUGZ465F45OlM/ZN
DCHudD2COaLXRFfU4BHYUuJmbwtHkDMlrEnBhvLOmn2LiWPRrxDWvWbqFVD8CNboArP52cMupmfI
saXcUbKmNM/ip6Z8Y3n9VD2cpvReHB3CsDyMKMdTT/hXS7icgjhps20MW1LYYmCPec7qQEsL1ti0
Pz3h0BxDir64PCE1doCOOfEpiXUgxw2Tn5nW1/DMuaWtlu6B6CyAlX2rXUzBsNpyimJbKhTXBsW9
czRktPRf6v79YAwM+1ENqkyNdTyhmJ3OCkVLXekwJYvQpGKzBxK/Kkqpls4FtoBH3rYRQz0kzqiN
JSiKGw6c9xNoY6djfTOzakQfW2TRDJuiLKR6hsoaTnKbY3VRGk69HvrlWAO27ej3VRc9DSHuKyss
iGMFLJUvn/+jIER+pMpjhAadRTxloMfzJkqRAjUNucxbCvvlobFzjEGrNIQqei35io15qCa7NCYJ
q1q1spKVHPvaP1PF4NHW1rrVTYGfCX6iQMKlr+nsaAM4DIEdjqoRE7yrwmPWd7jFczMaccdeT7VA
OJ0JiQbKbHkccrQrSWfJYbE7eUqYHZO4sY6JfHE0gjhEVZ5qHBvoSo8ClFJM6znJev/9DQXbWs2v
U9Ri8qbw7uHHe0vGlv+IHCqWBrhUxbbsckL3XOM0l1VASXUJvTk/vHcW83KKdIgpzxb7jiXbxaOH
6P4cJbMUK+GZRr5Qh3ckzyyTthslRyRngZgaxGBIRWQ+tTL5VPjPdJoHCiy0/Er+UPbgOpSWFHr0
NGu8i542RloDxx1CN1ZXNZtvaxet07Te6rmLKZa2O1a5G3/PrasZW9Ivn/+we+BM/2dxHNHEAwm/
fP7DxX7DMs8vlX7ZpLBlZ8yyZKvEYeCP9YKOQZcQcD+NI9Gu+gr1SNLAdolICx233erXMorETOk1
PORzMZ0QN62tk0CUsH/xLq3jQtA4Md9YPwnryBAbYrZzDXc4ioYWZ7VsSgA22a8zyUQo8aCYWat4
zzJ4Oi8maoqizaxSQjgxoEic3Nik+vNk+XSYTUNshxQ3+tXWRcPC2j0ZHaPYHKo5Dotq/XQB8HOd
gcvZWLsYUuyH/JTjM9XjLt++Gf0tjqP30Xr99nay5pgxHA03l9PofO7waZwV9WSMXjh0Hfnj1uPM
jcVzPDFcnKT9v+G85HtMnaX9f9wfDbVjyT8NiTy785SPyxL9Uu/Zl5cdYK4BL5TSlh09ZaZkpfDU
YAnjH4bFSJenhqWlNCQe/zU0w1Nz69/e4Dd45RYXny7+CwAA//8DAOzQwR9DDQAA
headers:
CF-RAY:
- 93402298d9980110-GRU
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 21 Apr 2025 21:57:12 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '6385'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999832'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_2a19c29e1e9dd766289937937418044a
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
personal goal is: Test Goal\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
is the expected criteria for your final answer: Output\nyou MUST return the
actual complete content as the final answer, not a summary.\n\nThis is the context
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
Previous result:\nAs a Test Agent, I have been tasked with providing a complete
response that meets the expected criteria for output. Therefore, here is the
full content without summarization, detailed as required.\n\nIn the realm of
testing, particularly software testing, several critical components come into
play. The fundamental types of testing include but are not limited to:\n\n1.
**Unit Testing**: This is the initial phase where individual components or modules
of the software are tested in isolation. Unit tests help ensure that each part
of the program functions correctly on its own, which aids in catching bugs early
in the development process.\n\n2. **Integration Testing**: After unit testing,
integration testing follows. This phase focuses on verifying the interaction
between various components or systems and ensuring that they work together as
intended.\n\n3. **System Testing**: Once the integrated components have been
tested, system testing involves validating the complete and fully integrated
software product. This ensures that it meets the specified requirements and
works as expected in a real-world environment.\n\n4. **Acceptance Testing**:
This is typically the final phase of testing and is usually carried out by end-users
or clients. The goal is to validate the usability and functionality of the system
against business requirements. Acceptance testing confirms that the software
is ready for deployment and meets the user\u2019s needs.\n\nTo facilitate these
testing processes effectively, various tools are leveraged. These tools can
include:\n\n- **Automated Testing Tools**: Tools like Selenium for web applications
or JUnit for Java applications allow testers to automate repetitive testing
tasks, which increases efficiency and reduces the possibility of human error.\n\n-
**Test Management Tools**: Tools such as JIRA or TestRail are essential for
tracking test progress, managing test cases, and reporting testing outcomes.\n\n-
**Continuous Integration Tools**: Tools like Jenkins help in automating the
process of running tests as part of the development pipeline, ensuring that
any changes made in the codebase are continuously tested.\n\nIn conclusion,
thorough testing through various stages\u2014unit, integration, system, and
acceptance\u2014combined with the strategic use of specialized tools, lays the
foundation for delivering high-quality software. It ensures that the product
not only functions correctly but also meets user expectations, paving the way
for successful implementations and satisfied clients.\n\n\nTry again, making
sure to address the validation error.\n\nBegin! This is VERY important to you,
use the tools available and give your best Final Answer, your job depends on
it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '3539'
content-type:
- application/json
cookie:
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.68.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.68.2
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- '600.0'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.12
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA3RXTY/cuBG9768ojA9ZG90Ne3YSB3ObDJxgjMTe2G0ESHxhkyWpdiiWzI9uy/vn
gypKavXYexn0SGSx+F69V6XffwK4Ind1C1e2M9n2g9/+7X3+93+af7rxtw+PN2OPx5tCzc2bb+Px
v+/91UZ28OE3tHnetbPcDx4zcaivbUSTUaK+en3z5+vX13/55Vpf9OzQy7Z2yNsb3vYUaHv98vpm
+/L19tVfp90dk8V0dQv/+wkA4Hf9K3kGh1+vbuHlZn7SY0qmxavbZRHAVWQvT65MSpSyCflqc35p
OWQMmvoDBD6BNQFaOiIYaCVtMCGdMAJ8Dn+nYDzc6f+38BAgdwgRje+BG0jc5JOJCBlTptBuwIAA
EbHDkDTiMEQ2tgNKgClhyGQ8ZAYMqcjGTo4bBk/WCHgJesQMX4rxlEeQ3J2JLoEJDpriG/IeSsII
+HVAm+umHew7hIa95xOFFrhkTwGTZtuU4EyPIRuvyXHAkJOkL2+fXgE8NWhH63ED2A+dSfRNHh9N
JC4JPB7R193TBslMIq2vxz5ByeTpG8rLyKXtuGRdN0S2mNLuc/gcnj17BvtxQA34cU5lXyPLglc7
ePHiU6A8P3zx4hb2HSXI44DrNLKJLeYEFBwdyRXj5ea2gsoReswduwQnyh1VHi07PJiET/lAIazI
oQPGhmOfwKQJcHQ7eMhCp2JLMWUQrCUVhw2GhGBaQyFlOJQ2bQCDOXjJ0Al0PGBMcqA12XaAMXJM
cMCG9fQVIUPkNgqoutybjFHqocW0A0VEbp5AuRsHssb7EUzJ3IvwoCQ5s4mmxxPHxwSeHhHe6s6G
I7w1RyO4vFue7N692W/AzEX0pZB9hCNGaqbiBFeiQt3hfBmpqwtGr4Wwh5CxjXXPire7Ru5Q5txV
MJT/lMDGYidd6HkjdHwCR02DUQ7o2RWPwm3GaGxWEoEDggmcO4w7WB+5VKaAJ/idOpRVUv4HCujW
OhBwIHM7r4gRbfbjBshJOTejRNKDG2NROba56pFSKpqVIjInJxkcMJ8Qgz6TrISPQFmFSglEVqi2
Q+HI/ohw4NxBb4KUrYQ+87jUN9ou0Bc5sFJbVaas/sop9yYoi3e/Pix7OCr87/5xZnztNUrYL0LY
xzFl7L/TWM1zyjGdU1HtqN+jW0cUmRg4dexxB1PMeRMejS8mT6aEwW0zbzE4SHVdGtAulaaySXO1
mXypjAN2RvJZiRKMjZzSYlPJYpCflSjLwdHklA8ZMEj2Wh2LR0y4Bw7b1aNzmQbri5N0JkswweIG
SjIHEqfe6O6EtkTx7QUphraYaEJGPF9E6irifHFxBMUl9cy586MUFOVa7cGhAwxHihxEa0rZjVB2
Zy0OWdJY03bPwRUF5OwJh1Hhlq6hTpiyecSOvcOYNmDOceasHWaMvTaQWTmKf81XGlQlcbXVRsoY
yUy6kBbpRq06h4PnUXOvRdXQCtm5xBIMdGRpUbKnmsCPyZ+aYIKDyACTnPWlUMRe5Szna38MiC7t
4J77Xgpz6sMqV6ES4ZOs+h5F+PnT3f65xnmvzNRK+NHC93f750sbe7O0v/n9XgQqr/cM2DRkCUP2
o8jctNXsFxSqg6LwoTWMedQBQ0RhpjaqehcIsBdI0d1K8K3UwuIWF2er59ZXIk46G62AjKEzwc56
TgOKioKDiJ6mql71110NCanYToT3ET0GKr2GOuHhcoiZ7QsiDpgpyyik7WoDp448rs3rbhgkjrjh
AYU7pzF7PshCMwxnFSZqg3qE4kj9EPmoFygxmMhFJhHqsVaBsbZEY8fdjJJgA/9S8LVtLRjtz8m8
ffhwN9vmB0Ne8ZaRIg0Ykjl41OQ4tibUwWjxt6bhWB0eE8LgTa6jQ47GPuqypaVvlhqQp1YU4NgW
HdIEwOomjbHCgsBo2Xtz4Km/mZ71YCNi7A8q48UurTdqQQqBp1Zt4+kM9qTuFojuOWQKRRz0oofP
SP3B+5/vH55XSjdLgbzF8Eh19LqnaD3eP2zOdbFOghudxMB2JrQTeSuN0OqgKV+xtFhCmPFPdWQT
7msoKXXue8o6r9Vepr3ZInToh3Q5f4cRAp7UeXNkV6yOCE5YwQSBMxwimkfAr1RTPrcI9f6UI5re
U/jRdCQDRuP5tDjFPYsDJeKg5jA10/M3wx+P5BedSwaKzRqdzeTQm7n8n/j6pqKUMKpoDDi9jc0w
Yq7ji+UQaj8dShxYmz849CSOHFroqO22y8fJlKeYbBk8ujqWqZtkaWkt2YvpgJsfzTW1bFQJ/Vmc
iX2ZJgG5zf3DslCH/461ns/fV5XVeWabjMxOv0cYOE/uPA1t08S97k6ffCbJTYY/bGTMm21L9a0F
k37QkaQ+OIixa2dcjQ8XnelQMuBXK23p+y+4Oj6UYRAPEQ1JmTfFT9NBDcclW+5xniIdeWpX8zd4
M9bG3KoX6mQrblUd3V9+W7hi/w8AAP//jJhBbsQgDEX3OUXEASp12s70MiPkgkndMoDALHP3CsIE
0s6i6weOfyKI/+emZkFXR5BZ5cT+VnwGMCXThtnS2v6zba09jYY6oskJiql32doBgHO+CSxW/trI
upt365cQ/Uf6tVUYcpQ+ZURI3hWjntgHUek6zfO1hgT54PtFiP4WWLL/xvq480sLCUTPJgb6eqdc
xo4Onk+XOzlUlBoZyKYhaBCqTBS67+2pBGRNfgDToPtvP49qb9rJLf8p30E99ahliKhJHTX3ZRG/
6jF/vGx/z7VhUS8MhZIJY/kWGg1ku0UqYrtypCG3YAyRtlzFBGku+u0M73BSYlqnHwAAAP//AwDv
DD9WZRIAAA==
headers:
CF-RAY:
- 934022c27c860110-GRU
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 21 Apr 2025 21:57:22 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '9187'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999158'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_94bb40dead4c4e9c7fa12de3bfb636b7
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,522 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
personal goal is: Test Goal\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
is the expected criteria for your final answer: Output\nyou MUST return the
actual complete content as the final answer, not a summary.\n\nBegin! This is
VERY important to you, use the tools available and give your best Final Answer,
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '807'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.68.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.68.2
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- '600.0'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.12
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
+Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
WrN6Wv0LAAD//wMAAfXtOswFAAA=
headers:
CF-RAY:
- 934022059c2c0110-GRU
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 21 Apr 2025 21:56:45 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '2377'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999832'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_f39581c88a83855cf77c06098b787948
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
personal goal is: Test Goal\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
is the expected criteria for your final answer: Output\nyou MUST return the
actual complete content as the final answer, not a summary.\n\nThis is the context
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
Previous result:\nThe task at hand requires a comprehensive and detailed response
that meets the expected criteria for output. In fulfilling this requirement,
it is essential to cover all relevant aspects and provide complete content,
ensuring that the information is clear, concise, and accurately addresses the
needs of the task. Therefore, I will outline the necessary components, adhere
to any guidelines provided, and ensure that the final output is thorough and
well-presented, without omitting important details or summarizing the information.
This approach will guarantee a high-quality response that satisfies the outlined
expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
This is VERY important to you, use the tools available and give your best Final
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
["\nObservation:"]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '1619'
content-type:
- application/json
cookie:
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.68.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.68.2
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- '600.0'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.12
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
+FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
headers:
CF-RAY:
- 93402216690b0110-GRU
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 21 Apr 2025 21:56:49 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '4451'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999631'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_8383a16d5f5b7f53d659bebf481ba936
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
personal goal is: Test Goal\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
is the expected criteria for your final answer: Output\nyou MUST return the
actual complete content as the final answer, not a summary.\n\nBegin! This is
VERY important to you, use the tools available and give your best Final Answer,
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '807'
content-type:
- application/json
cookie:
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.68.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.68.2
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- '600.0'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.12
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
+/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
headers:
CF-RAY:
- 93402233baf00110-GRU
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 21 Apr 2025 21:56:56 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '6058'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999832'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_f5273114a4a797fd0928674edb442194
status:
code: 200
message: OK
- request:
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
personal goal is: Test Goal\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
is the expected criteria for your final answer: Output\nyou MUST return the
actual complete content as the final answer, not a summary.\n\nThis is the context
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
in a controlled environment. The criteria for evaluating success include clarity
of instructions, alignment with learning objectives, and the level of engagement
they elicit from participants. Specifically, a test task should include a clear
and concise prompt that outlines what is expected from the participants. Additionally,
it should have a well-defined scoring rubric that assesses the quality of responses
based on predetermined criteria. By ensuring that the test task is relevant
and challenging, it will effectively measure the participants\u2019 capabilities
and provide valuable insights into their understanding of the subject matter.
Effective preparation, testing methods, and feedback mechanisms are essential
to the success of any test task, while also maintaining an environment conducive
to learning and growth.\n\n\nTry again, making sure to address the validation
error.\n\nBegin! This is VERY important to you, use the tools available and
give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
"gpt-4o-mini", "stop": ["\nObservation:"]}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '1887'
content-type:
- application/json
cookie:
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.68.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.68.2
x-stainless-raw-response:
- 'true'
x-stainless-read-timeout:
- '600.0'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.12
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
+9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
headers:
CF-RAY:
- 9340225b9bca0110-GRU
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Mon, 21 Apr 2025 21:57:05 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '9141'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999564'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_0fc29337116c1d19a0543dfe5b0db291
status:
code: 200
message: OK
version: 1

View File

@@ -1,11 +1,16 @@
"""Tests for task guardrails functionality."""
from unittest.mock import Mock
from unittest.mock import Mock, patch
import pytest
from crewai.task import Task
from crewai import Agent, Task
from crewai.llm import LLM
from crewai.tasks.guardrail_task import GuardrailTask
from crewai.tasks.task_output import TaskOutput
from crewai.utilities.events import (
GuardrailTaskCompletedEvent,
GuardrailTaskStartedEvent,
)
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
def test_task_without_guardrail():
@@ -22,7 +27,7 @@ def test_task_without_guardrail():
assert result.raw == "test result"
def test_task_with_successful_guardrail():
def test_task_with_successful_guardrail_func():
"""Test that successful guardrail validation passes transformed result."""
def guardrail(result: TaskOutput):
@@ -127,3 +132,190 @@ def test_guardrail_error_in_context():
assert "Task failed guardrail validation" in str(exc_info.value)
assert "Expected JSON, got string" in str(exc_info.value)
@pytest.fixture
def sample_agent():
return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
@pytest.mark.vcr(filter_headers=["authorization"])
def test_guardrail_using_llm(sample_agent):
task = Task(
description="Test task",
expected_output="Output",
guardrail="Ensure the output is equal to 'good result'",
)
with patch(
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
side_effect=[(False, "bad result"), (True, "good result")],
) as mock_guardrail:
task.execute_sync(agent=sample_agent)
assert mock_guardrail.call_count == 2
task.guardrail = GuardrailTask(
description="Ensure the output is equal to 'good result'",
llm=LLM(model="gpt-4o-mini"),
)
with patch(
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
side_effect=[(False, "bad result"), (True, "good result")],
) as mock_guardrail:
task.execute_sync(agent=sample_agent)
assert mock_guardrail.call_count == 2
@pytest.fixture
def task_output():
return TaskOutput(
raw="Test output",
description="Test task",
expected_output="Output",
agent="Test Agent",
)
def test_guardrail_task_initialization_no_llm(task_output):
"""Test GuardrailTask initialization fails without LLM"""
with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"):
GuardrailTask(description="Test")(task_output)
@pytest.fixture
def mock_llm():
llm = Mock(spec=LLM)
llm.call.return_value = """
output = 'Sample book data'
if isinstance(output, str):
result = (True, output)
else:
result = (False, 'Invalid output format')
print(result)
"""
return llm
@pytest.mark.parametrize(
"tool_run_output",
[
{
"output": "(True, 'Valid output')",
"expected_result": True,
"expected_output": "Valid output",
},
{
"output": "(False, 'Invalid output format')",
"expected_result": False,
"expected_output": "Invalid output format",
},
{
"output": "Something went wrong while running the code, Invalid output format",
"expected_result": False,
"expected_output": "Something went wrong while running the code, Invalid output format",
},
{
"output": "No result variable found",
"expected_result": False,
"expected_output": "No result variable found",
},
{
"output": (False, "Invalid output format"),
"expected_result": False,
"expected_output": "Invalid output format",
},
],
)
@patch("crewai_tools.CodeInterpreterTool.run")
def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output):
mock_run.return_value = tool_run_output["output"]
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
result = guardrail(task_output)
assert result[0] == tool_run_output["expected_result"]
assert result[1] == tool_run_output["expected_output"]
@patch("crewai_tools.CodeInterpreterTool.run")
def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
mock_run.return_value = "(True, 'Valid output')"
additional_instructions = (
"This is an additional instruction created by the user follow it strictly"
)
guardrail = GuardrailTask(
description="Test validation",
llm=mock_llm,
additional_instructions=additional_instructions,
)
guardrail(task_output)
assert additional_instructions in str(mock_llm.call.call_args)
@pytest.mark.vcr(filter_headers=["authorization"])
def test_guardrail_emits_events(sample_agent):
started_guardrail = []
completed_guardrail = []
with crewai_event_bus.scoped_handlers():
@crewai_event_bus.on(GuardrailTaskStartedEvent)
def handle_guardrail_started(source, event):
started_guardrail.append(
{"guardrail": event.guardrail, "retry_count": event.retry_count}
)
@crewai_event_bus.on(GuardrailTaskCompletedEvent)
def handle_guardrail_completed(source, event):
completed_guardrail.append(
{
"success": event.success,
"result": event.result,
"error": event.error,
"retry_count": event.retry_count,
}
)
task = Task(
description="Test task",
expected_output="Output",
guardrail="Ensure the output is equal to 'good result'",
)
with patch(
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
side_effect=[(False, "bad result"), (True, "good result")],
):
task.execute_sync(agent=sample_agent)
expected_started_events = [
{
"guardrail": "Ensure the output is equal to 'good result'",
"retry_count": 0,
},
{
"guardrail": "Ensure the output is equal to 'good result'",
"retry_count": 1,
},
]
expected_completed_events = [
{
"success": False,
"result": None,
"error": "bad result",
"retry_count": 0,
},
{
"success": True,
"result": "good result",
"error": None,
"retry_count": 1,
},
]
assert started_guardrail == expected_started_events
assert completed_guardrail == expected_completed_events