mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
feat: support to define a guardrail task no-code
This commit is contained in:
@@ -322,6 +322,14 @@ blog_task = Task(
|
||||
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
|
||||
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
|
||||
|
||||
### GuardrailTask
|
||||
|
||||
The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
|
||||
|
||||
#### Code Execution
|
||||
|
||||
The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
|
||||
|
||||
### Error Handling Best Practices
|
||||
|
||||
1. **Structured Error Responses**:
|
||||
@@ -750,6 +758,8 @@ Task guardrails provide a powerful way to validate, transform, or filter task ou
|
||||
|
||||
### Basic Usage
|
||||
|
||||
#### Define your own logic to validate
|
||||
|
||||
```python Code
|
||||
from typing import Tuple, Union
|
||||
from crewai import Task
|
||||
@@ -769,6 +779,34 @@ task = Task(
|
||||
)
|
||||
```
|
||||
|
||||
#### Leverage a no-code approach for validation
|
||||
|
||||
```python Code
|
||||
from crewai import Task
|
||||
|
||||
task = Task(
|
||||
description="Generate JSON data",
|
||||
expected_output="Valid JSON object",
|
||||
guardrail="Ensure the response is a valid JSON object"
|
||||
)
|
||||
```
|
||||
|
||||
#### Use custom models for code generation
|
||||
|
||||
```python Code
|
||||
from crewai import Task
|
||||
from crewai.llm import LLM
|
||||
|
||||
task = Task(
|
||||
description="Generate JSON data",
|
||||
expected_output="Valid JSON object",
|
||||
guardrail=GuardrailTask(
|
||||
description="Ensure the response is a valid JSON object",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### How Guardrails Work
|
||||
|
||||
1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.
|
||||
|
||||
@@ -140,7 +140,7 @@ class Task(BaseModel):
|
||||
default=None,
|
||||
)
|
||||
processed_by_agents: Set[str] = Field(default_factory=set)
|
||||
guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
|
||||
guardrail: Optional[Union[Callable[[TaskOutput], Tuple[bool, Any]], str]] = Field(
|
||||
default=None,
|
||||
description="Function to validate task output before proceeding to next task",
|
||||
)
|
||||
@@ -157,8 +157,12 @@ class Task(BaseModel):
|
||||
|
||||
@field_validator("guardrail")
|
||||
@classmethod
|
||||
def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
|
||||
"""Validate that the guardrail function has the correct signature and behavior.
|
||||
def validate_guardrail_function(
|
||||
cls, v: Optional[str | Callable]
|
||||
) -> Optional[str | Callable]:
|
||||
"""
|
||||
If v is a callable, validate that the guardrail function has the correct signature and behavior.
|
||||
If v is a string, return it as is.
|
||||
|
||||
While type hints provide static checking, this validator ensures runtime safety by:
|
||||
1. Verifying the function accepts exactly one parameter (the TaskOutput)
|
||||
@@ -171,16 +175,16 @@ class Task(BaseModel):
|
||||
- Clear error messages help users debug guardrail implementation issues
|
||||
|
||||
Args:
|
||||
v: The guardrail function to validate
|
||||
v: The guardrail function to validate or a string describing the guardrail task
|
||||
|
||||
Returns:
|
||||
The validated guardrail function
|
||||
The validated guardrail function or a string describing the guardrail task
|
||||
|
||||
Raises:
|
||||
ValueError: If the function signature is invalid or return annotation
|
||||
doesn't match Tuple[bool, Any]
|
||||
"""
|
||||
if v is not None:
|
||||
if v is not None and callable(v):
|
||||
sig = inspect.signature(v)
|
||||
positional_args = [
|
||||
param
|
||||
@@ -408,9 +412,7 @@ class Task(BaseModel):
|
||||
)
|
||||
|
||||
if self.guardrail:
|
||||
guardrail_result = GuardrailResult.from_tuple(
|
||||
self.guardrail(task_output)
|
||||
)
|
||||
guardrail_result = self._process_guardrail(task_output)
|
||||
if not guardrail_result.success:
|
||||
if self.retry_count >= self.max_retries:
|
||||
raise Exception(
|
||||
@@ -464,13 +466,52 @@ class Task(BaseModel):
|
||||
)
|
||||
)
|
||||
self._save_file(content)
|
||||
crewai_event_bus.emit(self, TaskCompletedEvent(output=task_output, task=self))
|
||||
crewai_event_bus.emit(
|
||||
self, TaskCompletedEvent(output=task_output, task=self)
|
||||
)
|
||||
return task_output
|
||||
except Exception as e:
|
||||
self.end_time = datetime.datetime.now()
|
||||
crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self))
|
||||
raise e # Re-raise the exception after emitting the event
|
||||
|
||||
def _process_guardrail(self, task_output: TaskOutput) -> GuardrailResult:
|
||||
if self.guardrail is None:
|
||||
raise ValueError("Guardrail is not set")
|
||||
|
||||
from crewai.utilities.events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
)
|
||||
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
GuardrailTaskStartedEvent(
|
||||
guardrail=self.guardrail, retry_count=self.retry_count
|
||||
),
|
||||
)
|
||||
|
||||
if isinstance(self.guardrail, str):
|
||||
from crewai.tasks.guardrail_task import GuardrailTask
|
||||
|
||||
result = GuardrailTask(description=self.guardrail, task=self)(task_output)
|
||||
else:
|
||||
result = self.guardrail(task_output)
|
||||
|
||||
guardrail_result = GuardrailResult.from_tuple(result)
|
||||
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
GuardrailTaskCompletedEvent(
|
||||
success=guardrail_result.success,
|
||||
result=guardrail_result.result,
|
||||
error=guardrail_result.error,
|
||||
retry_count=self.retry_count,
|
||||
),
|
||||
)
|
||||
return guardrail_result
|
||||
|
||||
def prompt(self) -> str:
|
||||
"""Prompt the task.
|
||||
|
||||
|
||||
154
src/crewai/tasks/guardrail_task.py
Normal file
154
src/crewai/tasks/guardrail_task.py
Normal file
@@ -0,0 +1,154 @@
|
||||
from typing import Any, Tuple
|
||||
|
||||
from crewai.llm import LLM
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.printer import Printer
|
||||
|
||||
|
||||
class GuardrailTask:
|
||||
"""A task that validates the output of another task using generated Python code.
|
||||
|
||||
This class generates and executes Python code to validate task outputs based on
|
||||
specified criteria. It uses an LLM to generate the validation code and provides
|
||||
safety guardrails for code execution.
|
||||
|
||||
Args:
|
||||
description (str): The description of the validation criteria.
|
||||
task (Task, optional): The task whose output needs validation.
|
||||
llm (LLM, optional): The language model to use for code generation.
|
||||
additional_instructions (str, optional): Additional instructions for the guardrail task.
|
||||
|
||||
Raises:
|
||||
ValueError: If no valid LLM is provided.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
description: str,
|
||||
task: Task | None = None,
|
||||
llm: LLM | None = None,
|
||||
unsafe_mode: bool = False,
|
||||
additional_instructions: str = "",
|
||||
):
|
||||
self.description = description
|
||||
self.unsafe_mode: bool = unsafe_mode
|
||||
|
||||
fallback_llm: LLM | None = (
|
||||
task.agent.llm
|
||||
if task is not None
|
||||
and hasattr(task, "agent")
|
||||
and task.agent is not None
|
||||
and hasattr(task.agent, "llm")
|
||||
else None
|
||||
)
|
||||
self.llm: LLM | None = llm or fallback_llm
|
||||
|
||||
self.additional_instructions = additional_instructions
|
||||
|
||||
@property
|
||||
def system_instructions(self) -> str:
|
||||
"""System instructions for the LLM code generation.
|
||||
|
||||
Returns:
|
||||
str: Complete system instructions including security constraints.
|
||||
"""
|
||||
security_instructions = (
|
||||
"- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
|
||||
"- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
|
||||
"- Your code must not perform any file I/O, shell access, or dynamic code execution."
|
||||
)
|
||||
return (
|
||||
"You are a expert Python developer"
|
||||
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
|
||||
"Your code must:\n"
|
||||
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n"
|
||||
"- Use the literal string of the task output (already included in your input) if needed.\n"
|
||||
"- Generate the code **following strictly** the task description.\n"
|
||||
"- Be valid Python 3 — executable as-is.\n"
|
||||
f"{security_instructions}\n"
|
||||
"Additional instructions (do not override the previous instructions):\n"
|
||||
f"{self.additional_instructions}"
|
||||
)
|
||||
|
||||
def user_instructions(self, task_output: TaskOutput) -> str:
|
||||
"""Generates user instructions for the LLM code generation.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
str: Instructions for generating validation code.
|
||||
"""
|
||||
return (
|
||||
"Based on the task description below, generate Python 3 code that validates the task output. \n"
|
||||
"Task description:\n"
|
||||
f"{self.description}\n"
|
||||
"Here is the raw output from the task: \n"
|
||||
f"'{task_output.raw}' \n"
|
||||
"Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
|
||||
"Now generate Python code that follows the instructions above."
|
||||
)
|
||||
|
||||
def generate_code(self, task_output: TaskOutput) -> str:
|
||||
"""Generates Python code for validating the task output.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
str: Generated Python code for validation.
|
||||
"""
|
||||
if self.llm is None:
|
||||
raise ValueError("Provide a valid LLM to the GuardrailTask")
|
||||
|
||||
response = self.llm.call(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": self.system_instructions,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self.user_instructions(task_output=task_output),
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
printer = Printer()
|
||||
printer.print(
|
||||
content=f"The following code was generated for the guardrail task:\n{response}\n",
|
||||
color="cyan",
|
||||
)
|
||||
return response
|
||||
|
||||
def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Executes the validation code on the task output.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
Tuple[bool, Any]: A tuple containing:
|
||||
- bool: True if validation passed, False otherwise
|
||||
- Any: The validation result or error message
|
||||
"""
|
||||
import ast
|
||||
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
code = self.generate_code(task_output)
|
||||
result = CodeInterpreterTool(code=code, unsafe_mode=self.unsafe_mode).run()
|
||||
|
||||
error_messages = [
|
||||
"Something went wrong while running the code",
|
||||
"No result variable found", # when running in unsafe mode, the final output should be stored in the result variable
|
||||
]
|
||||
|
||||
if any(msg in result for msg in error_messages):
|
||||
return False, result
|
||||
|
||||
if isinstance(result, str):
|
||||
result = ast.literal_eval(result)
|
||||
|
||||
return result
|
||||
@@ -9,6 +9,10 @@ from .crew_events import (
|
||||
CrewTestCompletedEvent,
|
||||
CrewTestFailedEvent,
|
||||
)
|
||||
from .guardrail_task_events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
)
|
||||
from .agent_events import (
|
||||
AgentExecutionStartedEvent,
|
||||
AgentExecutionCompletedEvent,
|
||||
|
||||
@@ -23,6 +23,10 @@ from .flow_events import (
|
||||
MethodExecutionFinishedEvent,
|
||||
MethodExecutionStartedEvent,
|
||||
)
|
||||
from .guardrail_task_events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
)
|
||||
from .llm_events import (
|
||||
LLMCallCompletedEvent,
|
||||
LLMCallFailedEvent,
|
||||
@@ -68,4 +72,6 @@ EventTypes = Union[
|
||||
LLMCallCompletedEvent,
|
||||
LLMCallFailedEvent,
|
||||
LLMStreamChunkEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
GuardrailTaskCompletedEvent,
|
||||
]
|
||||
|
||||
28
src/crewai/utilities/events/guardrail_task_events.py
Normal file
28
src/crewai/utilities/events/guardrail_task_events.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai.utilities.events.base_events import BaseEvent
|
||||
|
||||
|
||||
class GuardrailTaskStartedEvent(BaseEvent):
|
||||
"""Event emitted when a guardrail task starts
|
||||
|
||||
Attributes:
|
||||
messages: Content can be either a string or a list of dictionaries that support
|
||||
multimodal content (text, images, etc.)
|
||||
"""
|
||||
|
||||
type: str = "guardrail_task_started"
|
||||
guardrail: Union[str, Callable]
|
||||
retry_count: int
|
||||
|
||||
|
||||
class GuardrailTaskCompletedEvent(BaseEvent):
|
||||
"""Event emitted when a guardrail task completes"""
|
||||
|
||||
type: str = "guardrail_task_completed"
|
||||
success: bool
|
||||
result: Any
|
||||
error: Optional[str] = None
|
||||
retry_count: int
|
||||
307
tests/cassettes/test_guardrail_emits_events.yaml
Normal file
307
tests/cassettes/test_guardrail_emits_events.yaml
Normal file
@@ -0,0 +1,307 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||
VERY important to you, use the tools available and give your best Final Answer,
|
||||
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '807'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFfNjiPHDb7PUxA6LiRhZ3Z2xpnbOHCQWSNY2J4gQbLGgKpid9NTzWoX
|
||||
qyRrjQX2IXzJ6+2TBKzu1s9mDrlIrWYVi/zI7yvq9wuABfvFHSxch9n1Q1h9+z7/8MM/dv/8E3H5
|
||||
fqdvdLh5//3mXx+/+/av/XeLpe2Im1/I5XnX2sV+CJQ5ymh2iTCTeb28vX57dXt1c3VTDX30FGxb
|
||||
O+TVdVz1LLy6en11vXp9u7r8ZtrdRXakizv49wUAwO/10+IUT78t7uD1cn7Tkyq2tLg7LAJYpBjs
|
||||
zQJVWTNKXiyPRhclk9TQH0DiDhwKtLwlQGgtbEDRHSWAD/IXFgxwX3/fwb0CwiNphvuWJC/hATrc
|
||||
EmyIBDLqM3nYce5gSHHLnqUFhAkZgkQ6RFGC3GGGnigr5I6AfhvIZfLgEmdKjNDEBLHkoeQ1PHaU
|
||||
qImJlmBPwOOmpoQAUyb1zFgyaOl7TPwRrQ5L8JSRA3lAhUS/Fk7k1x/kgzxI9ZEIQw+xgUyaWdol
|
||||
DJgyuxIwhT1obPIOEx3NSltKGGqg7Owh9kMUkqz2SMCSIwwB9zVuaIp47EkyBsj7gfTkMGBxoXiC
|
||||
Tclgp0jMELhnQyLHO4vzcg2vXv1dOFfQWdpXr+7gsWOdUWDhzBhg6FAJdiNA4nnLvpyHFxP00Zcw
|
||||
xdDRMb05RfLAAqwxVPzWUE82i0JHYQASLWkqH6HrKlyzuyHFNmFvOTvbboikRC6HPUQBthh2soRd
|
||||
x64DZK92msPsOkNjU1oFqrjzWB1PWwpxMPjMuSPVWrwrA+VBMrWpxnmKzX2TKUGZ465F45OlM/ZN
|
||||
DCHudD2COaLXRFfU4BHYUuJmbwtHkDMlrEnBhvLOmn2LiWPRrxDWvWbqFVD8CNboArP52cMupmfI
|
||||
saXcUbKmNM/ip6Z8Y3n9VD2cpvReHB3CsDyMKMdTT/hXS7icgjhps20MW1LYYmCPec7qQEsL1ti0
|
||||
Pz3h0BxDir64PCE1doCOOfEpiXUgxw2Tn5nW1/DMuaWtlu6B6CyAlX2rXUzBsNpyimJbKhTXBsW9
|
||||
czRktPRf6v79YAwM+1ENqkyNdTyhmJ3OCkVLXekwJYvQpGKzBxK/Kkqpls4FtoBH3rYRQz0kzqiN
|
||||
JSiKGw6c9xNoY6djfTOzakQfW2TRDJuiLKR6hsoaTnKbY3VRGk69HvrlWAO27ej3VRc9DSHuKyss
|
||||
iGMFLJUvn/+jIER+pMpjhAadRTxloMfzJkqRAjUNucxbCvvlobFzjEGrNIQqei35io15qCa7NCYJ
|
||||
q1q1spKVHPvaP1PF4NHW1rrVTYGfCX6iQMKlr+nsaAM4DIEdjqoRE7yrwmPWd7jFczMaccdeT7VA
|
||||
OJ0JiQbKbHkccrQrSWfJYbE7eUqYHZO4sY6JfHE0gjhEVZ5qHBvoSo8ClFJM6znJev/9DQXbWs2v
|
||||
U9Ri8qbw7uHHe0vGlv+IHCqWBrhUxbbsckL3XOM0l1VASXUJvTk/vHcW83KKdIgpzxb7jiXbxaOH
|
||||
6P4cJbMUK+GZRr5Qh3ckzyyTthslRyRngZgaxGBIRWQ+tTL5VPjPdJoHCiy0/Er+UPbgOpSWFHr0
|
||||
NGu8i542RloDxx1CN1ZXNZtvaxet07Te6rmLKZa2O1a5G3/PrasZW9Ivn/+we+BM/2dxHNHEAwm/
|
||||
fP7DxX7DMs8vlX7ZpLBlZ8yyZKvEYeCP9YKOQZcQcD+NI9Gu+gr1SNLAdolICx233erXMorETOk1
|
||||
PORzMZ0QN62tk0CUsH/xLq3jQtA4Md9YPwnryBAbYrZzDXc4ioYWZ7VsSgA22a8zyUQo8aCYWat4
|
||||
zzJ4Oi8maoqizaxSQjgxoEic3Nik+vNk+XSYTUNshxQ3+tXWRcPC2j0ZHaPYHKo5Dotq/XQB8HOd
|
||||
gcvZWLsYUuyH/JTjM9XjLt++Gf0tjqP30Xr99nay5pgxHA03l9PofO7waZwV9WSMXjh0Hfnj1uPM
|
||||
jcVzPDFcnKT9v+G85HtMnaX9f9wfDbVjyT8NiTy785SPyxL9Uu/Zl5cdYK4BL5TSlh09ZaZkpfDU
|
||||
YAnjH4bFSJenhqWlNCQe/zU0w1Nz69/e4Dd45RYXny7+CwAA//8DAOzQwR9DDQAA
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 93402298d9980110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:57:12 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '6385'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999832'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_2a19c29e1e9dd766289937937418044a
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||
Previous result:\nAs a Test Agent, I have been tasked with providing a complete
|
||||
response that meets the expected criteria for output. Therefore, here is the
|
||||
full content without summarization, detailed as required.\n\nIn the realm of
|
||||
testing, particularly software testing, several critical components come into
|
||||
play. The fundamental types of testing include but are not limited to:\n\n1.
|
||||
**Unit Testing**: This is the initial phase where individual components or modules
|
||||
of the software are tested in isolation. Unit tests help ensure that each part
|
||||
of the program functions correctly on its own, which aids in catching bugs early
|
||||
in the development process.\n\n2. **Integration Testing**: After unit testing,
|
||||
integration testing follows. This phase focuses on verifying the interaction
|
||||
between various components or systems and ensuring that they work together as
|
||||
intended.\n\n3. **System Testing**: Once the integrated components have been
|
||||
tested, system testing involves validating the complete and fully integrated
|
||||
software product. This ensures that it meets the specified requirements and
|
||||
works as expected in a real-world environment.\n\n4. **Acceptance Testing**:
|
||||
This is typically the final phase of testing and is usually carried out by end-users
|
||||
or clients. The goal is to validate the usability and functionality of the system
|
||||
against business requirements. Acceptance testing confirms that the software
|
||||
is ready for deployment and meets the user\u2019s needs.\n\nTo facilitate these
|
||||
testing processes effectively, various tools are leveraged. These tools can
|
||||
include:\n\n- **Automated Testing Tools**: Tools like Selenium for web applications
|
||||
or JUnit for Java applications allow testers to automate repetitive testing
|
||||
tasks, which increases efficiency and reduces the possibility of human error.\n\n-
|
||||
**Test Management Tools**: Tools such as JIRA or TestRail are essential for
|
||||
tracking test progress, managing test cases, and reporting testing outcomes.\n\n-
|
||||
**Continuous Integration Tools**: Tools like Jenkins help in automating the
|
||||
process of running tests as part of the development pipeline, ensuring that
|
||||
any changes made in the codebase are continuously tested.\n\nIn conclusion,
|
||||
thorough testing through various stages\u2014unit, integration, system, and
|
||||
acceptance\u2014combined with the strategic use of specialized tools, lays the
|
||||
foundation for delivering high-quality software. It ensures that the product
|
||||
not only functions correctly but also meets user expectations, paving the way
|
||||
for successful implementations and satisfied clients.\n\n\nTry again, making
|
||||
sure to address the validation error.\n\nBegin! This is VERY important to you,
|
||||
use the tools available and give your best Final Answer, your job depends on
|
||||
it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '3539'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA3RXTY/cuBG9768ojA9ZG90Ne3YSB3ObDJxgjMTe2G0ESHxhkyWpdiiWzI9uy/vn
|
||||
gypKavXYexn0SGSx+F69V6XffwK4Ind1C1e2M9n2g9/+7X3+93+af7rxtw+PN2OPx5tCzc2bb+Px
|
||||
v+/91UZ28OE3tHnetbPcDx4zcaivbUSTUaK+en3z5+vX13/55Vpf9OzQy7Z2yNsb3vYUaHv98vpm
|
||||
+/L19tVfp90dk8V0dQv/+wkA4Hf9K3kGh1+vbuHlZn7SY0qmxavbZRHAVWQvT65MSpSyCflqc35p
|
||||
OWQMmvoDBD6BNQFaOiIYaCVtMCGdMAJ8Dn+nYDzc6f+38BAgdwgRje+BG0jc5JOJCBlTptBuwIAA
|
||||
EbHDkDTiMEQ2tgNKgClhyGQ8ZAYMqcjGTo4bBk/WCHgJesQMX4rxlEeQ3J2JLoEJDpriG/IeSsII
|
||||
+HVAm+umHew7hIa95xOFFrhkTwGTZtuU4EyPIRuvyXHAkJOkL2+fXgE8NWhH63ED2A+dSfRNHh9N
|
||||
JC4JPB7R193TBslMIq2vxz5ByeTpG8rLyKXtuGRdN0S2mNLuc/gcnj17BvtxQA34cU5lXyPLglc7
|
||||
ePHiU6A8P3zx4hb2HSXI44DrNLKJLeYEFBwdyRXj5ea2gsoReswduwQnyh1VHi07PJiET/lAIazI
|
||||
oQPGhmOfwKQJcHQ7eMhCp2JLMWUQrCUVhw2GhGBaQyFlOJQ2bQCDOXjJ0Al0PGBMcqA12XaAMXJM
|
||||
cMCG9fQVIUPkNgqoutybjFHqocW0A0VEbp5AuRsHssb7EUzJ3IvwoCQ5s4mmxxPHxwSeHhHe6s6G
|
||||
I7w1RyO4vFue7N692W/AzEX0pZB9hCNGaqbiBFeiQt3hfBmpqwtGr4Wwh5CxjXXPire7Ru5Q5txV
|
||||
MJT/lMDGYidd6HkjdHwCR02DUQ7o2RWPwm3GaGxWEoEDggmcO4w7WB+5VKaAJ/idOpRVUv4HCujW
|
||||
OhBwIHM7r4gRbfbjBshJOTejRNKDG2NROba56pFSKpqVIjInJxkcMJ8Qgz6TrISPQFmFSglEVqi2
|
||||
Q+HI/ohw4NxBb4KUrYQ+87jUN9ou0Bc5sFJbVaas/sop9yYoi3e/Pix7OCr87/5xZnztNUrYL0LY
|
||||
xzFl7L/TWM1zyjGdU1HtqN+jW0cUmRg4dexxB1PMeRMejS8mT6aEwW0zbzE4SHVdGtAulaaySXO1
|
||||
mXypjAN2RvJZiRKMjZzSYlPJYpCflSjLwdHklA8ZMEj2Wh2LR0y4Bw7b1aNzmQbri5N0JkswweIG
|
||||
SjIHEqfe6O6EtkTx7QUphraYaEJGPF9E6irifHFxBMUl9cy586MUFOVa7cGhAwxHihxEa0rZjVB2
|
||||
Zy0OWdJY03bPwRUF5OwJh1Hhlq6hTpiyecSOvcOYNmDOceasHWaMvTaQWTmKf81XGlQlcbXVRsoY
|
||||
yUy6kBbpRq06h4PnUXOvRdXQCtm5xBIMdGRpUbKnmsCPyZ+aYIKDyACTnPWlUMRe5Szna38MiC7t
|
||||
4J77Xgpz6sMqV6ES4ZOs+h5F+PnT3f65xnmvzNRK+NHC93f750sbe7O0v/n9XgQqr/cM2DRkCUP2
|
||||
o8jctNXsFxSqg6LwoTWMedQBQ0RhpjaqehcIsBdI0d1K8K3UwuIWF2er59ZXIk46G62AjKEzwc56
|
||||
TgOKioKDiJ6mql71110NCanYToT3ET0GKr2GOuHhcoiZ7QsiDpgpyyik7WoDp448rs3rbhgkjrjh
|
||||
AYU7pzF7PshCMwxnFSZqg3qE4kj9EPmoFygxmMhFJhHqsVaBsbZEY8fdjJJgA/9S8LVtLRjtz8m8
|
||||
ffhwN9vmB0Ne8ZaRIg0Ykjl41OQ4tibUwWjxt6bhWB0eE8LgTa6jQ47GPuqypaVvlhqQp1YU4NgW
|
||||
HdIEwOomjbHCgsBo2Xtz4Km/mZ71YCNi7A8q48UurTdqQQqBp1Zt4+kM9qTuFojuOWQKRRz0oofP
|
||||
SP3B+5/vH55XSjdLgbzF8Eh19LqnaD3eP2zOdbFOghudxMB2JrQTeSuN0OqgKV+xtFhCmPFPdWQT
|
||||
7msoKXXue8o6r9Vepr3ZInToh3Q5f4cRAp7UeXNkV6yOCE5YwQSBMxwimkfAr1RTPrcI9f6UI5re
|
||||
U/jRdCQDRuP5tDjFPYsDJeKg5jA10/M3wx+P5BedSwaKzRqdzeTQm7n8n/j6pqKUMKpoDDi9jc0w
|
||||
Yq7ji+UQaj8dShxYmz849CSOHFroqO22y8fJlKeYbBk8ujqWqZtkaWkt2YvpgJsfzTW1bFQJ/Vmc
|
||||
iX2ZJgG5zf3DslCH/461ns/fV5XVeWabjMxOv0cYOE/uPA1t08S97k6ffCbJTYY/bGTMm21L9a0F
|
||||
k37QkaQ+OIixa2dcjQ8XnelQMuBXK23p+y+4Oj6UYRAPEQ1JmTfFT9NBDcclW+5xniIdeWpX8zd4
|
||||
M9bG3KoX6mQrblUd3V9+W7hi/w8AAP//jJhBbsQgDEX3OUXEASp12s70MiPkgkndMoDALHP3CsIE
|
||||
0s6i6weOfyKI/+emZkFXR5BZ5cT+VnwGMCXThtnS2v6zba09jYY6oskJiql32doBgHO+CSxW/trI
|
||||
upt365cQ/Uf6tVUYcpQ+ZURI3hWjntgHUek6zfO1hgT54PtFiP4WWLL/xvq480sLCUTPJgb6eqdc
|
||||
xo4Onk+XOzlUlBoZyKYhaBCqTBS67+2pBGRNfgDToPtvP49qb9rJLf8p30E99ahliKhJHTX3ZRG/
|
||||
6jF/vGx/z7VhUS8MhZIJY/kWGg1ku0UqYrtypCG3YAyRtlzFBGku+u0M73BSYlqnHwAAAP//AwDv
|
||||
DD9WZRIAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 934022c27c860110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:57:22 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '9187'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999158'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_94bb40dead4c4e9c7fa12de3bfb636b7
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
522
tests/cassettes/test_guardrail_using_llm.yaml
Normal file
522
tests/cassettes/test_guardrail_using_llm.yaml
Normal file
@@ -0,0 +1,522 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||
VERY important to you, use the tools available and give your best Final Answer,
|
||||
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '807'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
|
||||
8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
|
||||
aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
|
||||
RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
|
||||
AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
|
||||
MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
|
||||
mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
|
||||
lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
|
||||
2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
|
||||
GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
|
||||
EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
|
||||
+Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
|
||||
p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
|
||||
WrN6Wv0LAAD//wMAAfXtOswFAAA=
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 934022059c2c0110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:56:45 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '2377'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999832'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_f39581c88a83855cf77c06098b787948
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||
Previous result:\nThe task at hand requires a comprehensive and detailed response
|
||||
that meets the expected criteria for output. In fulfilling this requirement,
|
||||
it is essential to cover all relevant aspects and provide complete content,
|
||||
ensuring that the information is clear, concise, and accurately addresses the
|
||||
needs of the task. Therefore, I will outline the necessary components, adhere
|
||||
to any guidelines provided, and ensure that the final output is thorough and
|
||||
well-presented, without omitting important details or summarizing the information.
|
||||
This approach will guarantee a high-quality response that satisfies the outlined
|
||||
expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
|
||||
This is VERY important to you, use the tools available and give your best Final
|
||||
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
|
||||
["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1619'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
|
||||
ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
|
||||
PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
|
||||
F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
|
||||
NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
|
||||
xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
|
||||
8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
|
||||
y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
|
||||
moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
|
||||
fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
|
||||
gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
|
||||
5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
|
||||
Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
|
||||
ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
|
||||
cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
|
||||
ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
|
||||
mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
|
||||
jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
|
||||
ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
|
||||
+FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
|
||||
GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
|
||||
LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 93402216690b0110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:56:49 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '4451'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999631'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_8383a16d5f5b7f53d659bebf481ba936
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||
VERY important to you, use the tools available and give your best Final Answer,
|
||||
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '807'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
|
||||
v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
|
||||
n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
|
||||
s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
|
||||
t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
|
||||
JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
|
||||
6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
|
||||
uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
|
||||
SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
|
||||
6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
|
||||
ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
|
||||
K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
|
||||
Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
|
||||
8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
|
||||
+/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
|
||||
t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 93402233baf00110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:56:56 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '6058'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999832'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_f5273114a4a797fd0928674edb442194
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||
Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
|
||||
in a controlled environment. The criteria for evaluating success include clarity
|
||||
of instructions, alignment with learning objectives, and the level of engagement
|
||||
they elicit from participants. Specifically, a test task should include a clear
|
||||
and concise prompt that outlines what is expected from the participants. Additionally,
|
||||
it should have a well-defined scoring rubric that assesses the quality of responses
|
||||
based on predetermined criteria. By ensuring that the test task is relevant
|
||||
and challenging, it will effectively measure the participants\u2019 capabilities
|
||||
and provide valuable insights into their understanding of the subject matter.
|
||||
Effective preparation, testing methods, and feedback mechanisms are essential
|
||||
to the success of any test task, while also maintaining an environment conducive
|
||||
to learning and growth.\n\n\nTry again, making sure to address the validation
|
||||
error.\n\nBegin! This is VERY important to you, use the tools available and
|
||||
give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
|
||||
"gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1887'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
|
||||
fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
|
||||
+9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
|
||||
HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
|
||||
e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
|
||||
i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
|
||||
CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
|
||||
ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
|
||||
QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
|
||||
dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
|
||||
xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
|
||||
4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
|
||||
9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
|
||||
edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
|
||||
qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
|
||||
teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
|
||||
YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
|
||||
5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
|
||||
xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
|
||||
VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
|
||||
5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
|
||||
kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
|
||||
Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
|
||||
6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
|
||||
pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
|
||||
IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
|
||||
LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
|
||||
VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
|
||||
C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
|
||||
y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
|
||||
sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
|
||||
DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 9340225b9bca0110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:57:05 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '9141'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999564'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_0fc29337116c1d19a0543dfe5b0db291
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -1,11 +1,16 @@
|
||||
"""Tests for task guardrails functionality."""
|
||||
|
||||
from unittest.mock import Mock
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.task import Task
|
||||
from crewai import Agent, Task
|
||||
from crewai.llm import LLM
|
||||
from crewai.tasks.guardrail_task import GuardrailTask
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.events import (
|
||||
GuardrailTaskCompletedEvent,
|
||||
GuardrailTaskStartedEvent,
|
||||
)
|
||||
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||
|
||||
|
||||
def test_task_without_guardrail():
|
||||
@@ -22,7 +27,7 @@ def test_task_without_guardrail():
|
||||
assert result.raw == "test result"
|
||||
|
||||
|
||||
def test_task_with_successful_guardrail():
|
||||
def test_task_with_successful_guardrail_func():
|
||||
"""Test that successful guardrail validation passes transformed result."""
|
||||
|
||||
def guardrail(result: TaskOutput):
|
||||
@@ -127,3 +132,190 @@ def test_guardrail_error_in_context():
|
||||
|
||||
assert "Task failed guardrail validation" in str(exc_info.value)
|
||||
assert "Expected JSON, got string" in str(exc_info.value)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_agent():
|
||||
return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_guardrail_using_llm(sample_agent):
|
||||
task = Task(
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
guardrail="Ensure the output is equal to 'good result'",
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
) as mock_guardrail:
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
assert mock_guardrail.call_count == 2
|
||||
|
||||
task.guardrail = GuardrailTask(
|
||||
description="Ensure the output is equal to 'good result'",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
) as mock_guardrail:
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
assert mock_guardrail.call_count == 2
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def task_output():
|
||||
return TaskOutput(
|
||||
raw="Test output",
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
agent="Test Agent",
|
||||
)
|
||||
|
||||
|
||||
def test_guardrail_task_initialization_no_llm(task_output):
|
||||
"""Test GuardrailTask initialization fails without LLM"""
|
||||
with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"):
|
||||
GuardrailTask(description="Test")(task_output)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm():
|
||||
llm = Mock(spec=LLM)
|
||||
llm.call.return_value = """
|
||||
output = 'Sample book data'
|
||||
if isinstance(output, str):
|
||||
result = (True, output)
|
||||
else:
|
||||
result = (False, 'Invalid output format')
|
||||
print(result)
|
||||
"""
|
||||
return llm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tool_run_output",
|
||||
[
|
||||
{
|
||||
"output": "(True, 'Valid output')",
|
||||
"expected_result": True,
|
||||
"expected_output": "Valid output",
|
||||
},
|
||||
{
|
||||
"output": "(False, 'Invalid output format')",
|
||||
"expected_result": False,
|
||||
"expected_output": "Invalid output format",
|
||||
},
|
||||
{
|
||||
"output": "Something went wrong while running the code, Invalid output format",
|
||||
"expected_result": False,
|
||||
"expected_output": "Something went wrong while running the code, Invalid output format",
|
||||
},
|
||||
{
|
||||
"output": "No result variable found",
|
||||
"expected_result": False,
|
||||
"expected_output": "No result variable found",
|
||||
},
|
||||
{
|
||||
"output": (False, "Invalid output format"),
|
||||
"expected_result": False,
|
||||
"expected_output": "Invalid output format",
|
||||
},
|
||||
],
|
||||
)
|
||||
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||
def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output):
|
||||
mock_run.return_value = tool_run_output["output"]
|
||||
|
||||
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
|
||||
|
||||
result = guardrail(task_output)
|
||||
assert result[0] == tool_run_output["expected_result"]
|
||||
assert result[1] == tool_run_output["expected_output"]
|
||||
|
||||
|
||||
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||
def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
|
||||
mock_run.return_value = "(True, 'Valid output')"
|
||||
additional_instructions = (
|
||||
"This is an additional instruction created by the user follow it strictly"
|
||||
)
|
||||
guardrail = GuardrailTask(
|
||||
description="Test validation",
|
||||
llm=mock_llm,
|
||||
additional_instructions=additional_instructions,
|
||||
)
|
||||
|
||||
guardrail(task_output)
|
||||
|
||||
assert additional_instructions in str(mock_llm.call.call_args)
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_guardrail_emits_events(sample_agent):
|
||||
started_guardrail = []
|
||||
completed_guardrail = []
|
||||
|
||||
with crewai_event_bus.scoped_handlers():
|
||||
|
||||
@crewai_event_bus.on(GuardrailTaskStartedEvent)
|
||||
def handle_guardrail_started(source, event):
|
||||
started_guardrail.append(
|
||||
{"guardrail": event.guardrail, "retry_count": event.retry_count}
|
||||
)
|
||||
|
||||
@crewai_event_bus.on(GuardrailTaskCompletedEvent)
|
||||
def handle_guardrail_completed(source, event):
|
||||
completed_guardrail.append(
|
||||
{
|
||||
"success": event.success,
|
||||
"result": event.result,
|
||||
"error": event.error,
|
||||
"retry_count": event.retry_count,
|
||||
}
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
guardrail="Ensure the output is equal to 'good result'",
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
):
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
expected_started_events = [
|
||||
{
|
||||
"guardrail": "Ensure the output is equal to 'good result'",
|
||||
"retry_count": 0,
|
||||
},
|
||||
{
|
||||
"guardrail": "Ensure the output is equal to 'good result'",
|
||||
"retry_count": 1,
|
||||
},
|
||||
]
|
||||
expected_completed_events = [
|
||||
{
|
||||
"success": False,
|
||||
"result": None,
|
||||
"error": "bad result",
|
||||
"retry_count": 0,
|
||||
},
|
||||
{
|
||||
"success": True,
|
||||
"result": "good result",
|
||||
"error": None,
|
||||
"retry_count": 1,
|
||||
},
|
||||
]
|
||||
assert started_guardrail == expected_started_events
|
||||
assert completed_guardrail == expected_completed_events
|
||||
|
||||
Reference in New Issue
Block a user