mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
feat: support to define a guardrail task no-code
This commit is contained in:
@@ -322,6 +322,14 @@ blog_task = Task(
|
|||||||
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
|
- On success: it returns a tuple of `(bool, Any)`. For example: `(True, validated_result)`
|
||||||
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
|
- On Failure: it returns a tuple of `(bool, str)`. For example: `(False, "Error message explain the failure")`
|
||||||
|
|
||||||
|
### GuardrailTask
|
||||||
|
|
||||||
|
The `GuardrailTask` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
|
||||||
|
|
||||||
|
#### Code Execution
|
||||||
|
|
||||||
|
The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
|
||||||
|
|
||||||
### Error Handling Best Practices
|
### Error Handling Best Practices
|
||||||
|
|
||||||
1. **Structured Error Responses**:
|
1. **Structured Error Responses**:
|
||||||
@@ -750,6 +758,8 @@ Task guardrails provide a powerful way to validate, transform, or filter task ou
|
|||||||
|
|
||||||
### Basic Usage
|
### Basic Usage
|
||||||
|
|
||||||
|
#### Define your own logic to validate
|
||||||
|
|
||||||
```python Code
|
```python Code
|
||||||
from typing import Tuple, Union
|
from typing import Tuple, Union
|
||||||
from crewai import Task
|
from crewai import Task
|
||||||
@@ -769,6 +779,34 @@ task = Task(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Leverage a no-code approach for validation
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Task
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
description="Generate JSON data",
|
||||||
|
expected_output="Valid JSON object",
|
||||||
|
guardrail="Ensure the response is a valid JSON object"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Use custom models for code generation
|
||||||
|
|
||||||
|
```python Code
|
||||||
|
from crewai import Task
|
||||||
|
from crewai.llm import LLM
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
description="Generate JSON data",
|
||||||
|
expected_output="Valid JSON object",
|
||||||
|
guardrail=GuardrailTask(
|
||||||
|
description="Ensure the response is a valid JSON object",
|
||||||
|
llm=LLM(model="gpt-4o-mini"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
### How Guardrails Work
|
### How Guardrails Work
|
||||||
|
|
||||||
1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.
|
1. **Optional Attribute**: Guardrails are an optional attribute at the task level, allowing you to add validation only where needed.
|
||||||
|
|||||||
@@ -140,7 +140,7 @@ class Task(BaseModel):
|
|||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
processed_by_agents: Set[str] = Field(default_factory=set)
|
processed_by_agents: Set[str] = Field(default_factory=set)
|
||||||
guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
|
guardrail: Optional[Union[Callable[[TaskOutput], Tuple[bool, Any]], str]] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Function to validate task output before proceeding to next task",
|
description="Function to validate task output before proceeding to next task",
|
||||||
)
|
)
|
||||||
@@ -157,8 +157,12 @@ class Task(BaseModel):
|
|||||||
|
|
||||||
@field_validator("guardrail")
|
@field_validator("guardrail")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
|
def validate_guardrail_function(
|
||||||
"""Validate that the guardrail function has the correct signature and behavior.
|
cls, v: Optional[str | Callable]
|
||||||
|
) -> Optional[str | Callable]:
|
||||||
|
"""
|
||||||
|
If v is a callable, validate that the guardrail function has the correct signature and behavior.
|
||||||
|
If v is a string, return it as is.
|
||||||
|
|
||||||
While type hints provide static checking, this validator ensures runtime safety by:
|
While type hints provide static checking, this validator ensures runtime safety by:
|
||||||
1. Verifying the function accepts exactly one parameter (the TaskOutput)
|
1. Verifying the function accepts exactly one parameter (the TaskOutput)
|
||||||
@@ -171,16 +175,16 @@ class Task(BaseModel):
|
|||||||
- Clear error messages help users debug guardrail implementation issues
|
- Clear error messages help users debug guardrail implementation issues
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
v: The guardrail function to validate
|
v: The guardrail function to validate or a string describing the guardrail task
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The validated guardrail function
|
The validated guardrail function or a string describing the guardrail task
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If the function signature is invalid or return annotation
|
ValueError: If the function signature is invalid or return annotation
|
||||||
doesn't match Tuple[bool, Any]
|
doesn't match Tuple[bool, Any]
|
||||||
"""
|
"""
|
||||||
if v is not None:
|
if v is not None and callable(v):
|
||||||
sig = inspect.signature(v)
|
sig = inspect.signature(v)
|
||||||
positional_args = [
|
positional_args = [
|
||||||
param
|
param
|
||||||
@@ -408,9 +412,7 @@ class Task(BaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.guardrail:
|
if self.guardrail:
|
||||||
guardrail_result = GuardrailResult.from_tuple(
|
guardrail_result = self._process_guardrail(task_output)
|
||||||
self.guardrail(task_output)
|
|
||||||
)
|
|
||||||
if not guardrail_result.success:
|
if not guardrail_result.success:
|
||||||
if self.retry_count >= self.max_retries:
|
if self.retry_count >= self.max_retries:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
@@ -464,13 +466,52 @@ class Task(BaseModel):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
self._save_file(content)
|
self._save_file(content)
|
||||||
crewai_event_bus.emit(self, TaskCompletedEvent(output=task_output, task=self))
|
crewai_event_bus.emit(
|
||||||
|
self, TaskCompletedEvent(output=task_output, task=self)
|
||||||
|
)
|
||||||
return task_output
|
return task_output
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.end_time = datetime.datetime.now()
|
self.end_time = datetime.datetime.now()
|
||||||
crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self))
|
crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self))
|
||||||
raise e # Re-raise the exception after emitting the event
|
raise e # Re-raise the exception after emitting the event
|
||||||
|
|
||||||
|
def _process_guardrail(self, task_output: TaskOutput) -> GuardrailResult:
|
||||||
|
if self.guardrail is None:
|
||||||
|
raise ValueError("Guardrail is not set")
|
||||||
|
|
||||||
|
from crewai.utilities.events import (
|
||||||
|
GuardrailTaskCompletedEvent,
|
||||||
|
GuardrailTaskStartedEvent,
|
||||||
|
)
|
||||||
|
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||||
|
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
GuardrailTaskStartedEvent(
|
||||||
|
guardrail=self.guardrail, retry_count=self.retry_count
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(self.guardrail, str):
|
||||||
|
from crewai.tasks.guardrail_task import GuardrailTask
|
||||||
|
|
||||||
|
result = GuardrailTask(description=self.guardrail, task=self)(task_output)
|
||||||
|
else:
|
||||||
|
result = self.guardrail(task_output)
|
||||||
|
|
||||||
|
guardrail_result = GuardrailResult.from_tuple(result)
|
||||||
|
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
GuardrailTaskCompletedEvent(
|
||||||
|
success=guardrail_result.success,
|
||||||
|
result=guardrail_result.result,
|
||||||
|
error=guardrail_result.error,
|
||||||
|
retry_count=self.retry_count,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return guardrail_result
|
||||||
|
|
||||||
def prompt(self) -> str:
|
def prompt(self) -> str:
|
||||||
"""Prompt the task.
|
"""Prompt the task.
|
||||||
|
|
||||||
|
|||||||
154
src/crewai/tasks/guardrail_task.py
Normal file
154
src/crewai/tasks/guardrail_task.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
from typing import Any, Tuple
|
||||||
|
|
||||||
|
from crewai.llm import LLM
|
||||||
|
from crewai.task import Task
|
||||||
|
from crewai.tasks.task_output import TaskOutput
|
||||||
|
from crewai.utilities.printer import Printer
|
||||||
|
|
||||||
|
|
||||||
|
class GuardrailTask:
|
||||||
|
"""A task that validates the output of another task using generated Python code.
|
||||||
|
|
||||||
|
This class generates and executes Python code to validate task outputs based on
|
||||||
|
specified criteria. It uses an LLM to generate the validation code and provides
|
||||||
|
safety guardrails for code execution.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
description (str): The description of the validation criteria.
|
||||||
|
task (Task, optional): The task whose output needs validation.
|
||||||
|
llm (LLM, optional): The language model to use for code generation.
|
||||||
|
additional_instructions (str, optional): Additional instructions for the guardrail task.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If no valid LLM is provided.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
description: str,
|
||||||
|
task: Task | None = None,
|
||||||
|
llm: LLM | None = None,
|
||||||
|
unsafe_mode: bool = False,
|
||||||
|
additional_instructions: str = "",
|
||||||
|
):
|
||||||
|
self.description = description
|
||||||
|
self.unsafe_mode: bool = unsafe_mode
|
||||||
|
|
||||||
|
fallback_llm: LLM | None = (
|
||||||
|
task.agent.llm
|
||||||
|
if task is not None
|
||||||
|
and hasattr(task, "agent")
|
||||||
|
and task.agent is not None
|
||||||
|
and hasattr(task.agent, "llm")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
self.llm: LLM | None = llm or fallback_llm
|
||||||
|
|
||||||
|
self.additional_instructions = additional_instructions
|
||||||
|
|
||||||
|
@property
|
||||||
|
def system_instructions(self) -> str:
|
||||||
|
"""System instructions for the LLM code generation.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Complete system instructions including security constraints.
|
||||||
|
"""
|
||||||
|
security_instructions = (
|
||||||
|
"- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
|
||||||
|
"- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
|
||||||
|
"- Your code must not perform any file I/O, shell access, or dynamic code execution."
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
"You are a expert Python developer"
|
||||||
|
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
|
||||||
|
"Your code must:\n"
|
||||||
|
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n"
|
||||||
|
"- Use the literal string of the task output (already included in your input) if needed.\n"
|
||||||
|
"- Generate the code **following strictly** the task description.\n"
|
||||||
|
"- Be valid Python 3 — executable as-is.\n"
|
||||||
|
f"{security_instructions}\n"
|
||||||
|
"Additional instructions (do not override the previous instructions):\n"
|
||||||
|
f"{self.additional_instructions}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def user_instructions(self, task_output: TaskOutput) -> str:
|
||||||
|
"""Generates user instructions for the LLM code generation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_output (TaskOutput): The output to be validated.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Instructions for generating validation code.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
"Based on the task description below, generate Python 3 code that validates the task output. \n"
|
||||||
|
"Task description:\n"
|
||||||
|
f"{self.description}\n"
|
||||||
|
"Here is the raw output from the task: \n"
|
||||||
|
f"'{task_output.raw}' \n"
|
||||||
|
"Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
|
||||||
|
"Now generate Python code that follows the instructions above."
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate_code(self, task_output: TaskOutput) -> str:
|
||||||
|
"""Generates Python code for validating the task output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_output (TaskOutput): The output to be validated.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Generated Python code for validation.
|
||||||
|
"""
|
||||||
|
if self.llm is None:
|
||||||
|
raise ValueError("Provide a valid LLM to the GuardrailTask")
|
||||||
|
|
||||||
|
response = self.llm.call(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": self.system_instructions,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": self.user_instructions(task_output=task_output),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
printer = Printer()
|
||||||
|
printer.print(
|
||||||
|
content=f"The following code was generated for the guardrail task:\n{response}\n",
|
||||||
|
color="cyan",
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
|
||||||
|
"""Executes the validation code on the task output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task_output (TaskOutput): The output to be validated.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[bool, Any]: A tuple containing:
|
||||||
|
- bool: True if validation passed, False otherwise
|
||||||
|
- Any: The validation result or error message
|
||||||
|
"""
|
||||||
|
import ast
|
||||||
|
|
||||||
|
from crewai_tools import CodeInterpreterTool
|
||||||
|
|
||||||
|
code = self.generate_code(task_output)
|
||||||
|
result = CodeInterpreterTool(code=code, unsafe_mode=self.unsafe_mode).run()
|
||||||
|
|
||||||
|
error_messages = [
|
||||||
|
"Something went wrong while running the code",
|
||||||
|
"No result variable found", # when running in unsafe mode, the final output should be stored in the result variable
|
||||||
|
]
|
||||||
|
|
||||||
|
if any(msg in result for msg in error_messages):
|
||||||
|
return False, result
|
||||||
|
|
||||||
|
if isinstance(result, str):
|
||||||
|
result = ast.literal_eval(result)
|
||||||
|
|
||||||
|
return result
|
||||||
@@ -9,6 +9,10 @@ from .crew_events import (
|
|||||||
CrewTestCompletedEvent,
|
CrewTestCompletedEvent,
|
||||||
CrewTestFailedEvent,
|
CrewTestFailedEvent,
|
||||||
)
|
)
|
||||||
|
from .guardrail_task_events import (
|
||||||
|
GuardrailTaskCompletedEvent,
|
||||||
|
GuardrailTaskStartedEvent,
|
||||||
|
)
|
||||||
from .agent_events import (
|
from .agent_events import (
|
||||||
AgentExecutionStartedEvent,
|
AgentExecutionStartedEvent,
|
||||||
AgentExecutionCompletedEvent,
|
AgentExecutionCompletedEvent,
|
||||||
|
|||||||
@@ -23,6 +23,10 @@ from .flow_events import (
|
|||||||
MethodExecutionFinishedEvent,
|
MethodExecutionFinishedEvent,
|
||||||
MethodExecutionStartedEvent,
|
MethodExecutionStartedEvent,
|
||||||
)
|
)
|
||||||
|
from .guardrail_task_events import (
|
||||||
|
GuardrailTaskCompletedEvent,
|
||||||
|
GuardrailTaskStartedEvent,
|
||||||
|
)
|
||||||
from .llm_events import (
|
from .llm_events import (
|
||||||
LLMCallCompletedEvent,
|
LLMCallCompletedEvent,
|
||||||
LLMCallFailedEvent,
|
LLMCallFailedEvent,
|
||||||
@@ -68,4 +72,6 @@ EventTypes = Union[
|
|||||||
LLMCallCompletedEvent,
|
LLMCallCompletedEvent,
|
||||||
LLMCallFailedEvent,
|
LLMCallFailedEvent,
|
||||||
LLMStreamChunkEvent,
|
LLMStreamChunkEvent,
|
||||||
|
GuardrailTaskStartedEvent,
|
||||||
|
GuardrailTaskCompletedEvent,
|
||||||
]
|
]
|
||||||
|
|||||||
28
src/crewai/utilities/events/guardrail_task_events.py
Normal file
28
src/crewai/utilities/events/guardrail_task_events.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from crewai.utilities.events.base_events import BaseEvent
|
||||||
|
|
||||||
|
|
||||||
|
class GuardrailTaskStartedEvent(BaseEvent):
|
||||||
|
"""Event emitted when a guardrail task starts
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
messages: Content can be either a string or a list of dictionaries that support
|
||||||
|
multimodal content (text, images, etc.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: str = "guardrail_task_started"
|
||||||
|
guardrail: Union[str, Callable]
|
||||||
|
retry_count: int
|
||||||
|
|
||||||
|
|
||||||
|
class GuardrailTaskCompletedEvent(BaseEvent):
|
||||||
|
"""Event emitted when a guardrail task completes"""
|
||||||
|
|
||||||
|
type: str = "guardrail_task_completed"
|
||||||
|
success: bool
|
||||||
|
result: Any
|
||||||
|
error: Optional[str] = None
|
||||||
|
retry_count: int
|
||||||
307
tests/cassettes/test_guardrail_emits_events.yaml
Normal file
307
tests/cassettes/test_guardrail_emits_events.yaml
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
interactions:
|
||||||
|
- request:
|
||||||
|
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||||
|
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||||
|
respond using the exact following format:\n\nThought: I now can give a great
|
||||||
|
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||||
|
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||||
|
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||||
|
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||||
|
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||||
|
VERY important to you, use the tools available and give your best Final Answer,
|
||||||
|
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||||
|
headers:
|
||||||
|
accept:
|
||||||
|
- application/json
|
||||||
|
accept-encoding:
|
||||||
|
- gzip, deflate, zstd
|
||||||
|
connection:
|
||||||
|
- keep-alive
|
||||||
|
content-length:
|
||||||
|
- '807'
|
||||||
|
content-type:
|
||||||
|
- application/json
|
||||||
|
cookie:
|
||||||
|
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||||
|
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||||
|
host:
|
||||||
|
- api.openai.com
|
||||||
|
user-agent:
|
||||||
|
- OpenAI/Python 1.68.2
|
||||||
|
x-stainless-arch:
|
||||||
|
- arm64
|
||||||
|
x-stainless-async:
|
||||||
|
- 'false'
|
||||||
|
x-stainless-lang:
|
||||||
|
- python
|
||||||
|
x-stainless-os:
|
||||||
|
- MacOS
|
||||||
|
x-stainless-package-version:
|
||||||
|
- 1.68.2
|
||||||
|
x-stainless-raw-response:
|
||||||
|
- 'true'
|
||||||
|
x-stainless-read-timeout:
|
||||||
|
- '600.0'
|
||||||
|
x-stainless-retry-count:
|
||||||
|
- '0'
|
||||||
|
x-stainless-runtime:
|
||||||
|
- CPython
|
||||||
|
x-stainless-runtime-version:
|
||||||
|
- 3.11.12
|
||||||
|
method: POST
|
||||||
|
uri: https://api.openai.com/v1/chat/completions
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: !!binary |
|
||||||
|
H4sIAAAAAAAAAwAAAP//jFfNjiPHDb7PUxA6LiRhZ3Z2xpnbOHCQWSNY2J4gQbLGgKpid9NTzWoX
|
||||||
|
qyRrjQX2IXzJ6+2TBKzu1s9mDrlIrWYVi/zI7yvq9wuABfvFHSxch9n1Q1h9+z7/8MM/dv/8E3H5
|
||||||
|
fqdvdLh5//3mXx+/+/av/XeLpe2Im1/I5XnX2sV+CJQ5ymh2iTCTeb28vX57dXt1c3VTDX30FGxb
|
||||||
|
O+TVdVz1LLy6en11vXp9u7r8ZtrdRXakizv49wUAwO/10+IUT78t7uD1cn7Tkyq2tLg7LAJYpBjs
|
||||||
|
zQJVWTNKXiyPRhclk9TQH0DiDhwKtLwlQGgtbEDRHSWAD/IXFgxwX3/fwb0CwiNphvuWJC/hATrc
|
||||||
|
EmyIBDLqM3nYce5gSHHLnqUFhAkZgkQ6RFGC3GGGnigr5I6AfhvIZfLgEmdKjNDEBLHkoeQ1PHaU
|
||||||
|
qImJlmBPwOOmpoQAUyb1zFgyaOl7TPwRrQ5L8JSRA3lAhUS/Fk7k1x/kgzxI9ZEIQw+xgUyaWdol
|
||||||
|
DJgyuxIwhT1obPIOEx3NSltKGGqg7Owh9kMUkqz2SMCSIwwB9zVuaIp47EkyBsj7gfTkMGBxoXiC
|
||||||
|
Tclgp0jMELhnQyLHO4vzcg2vXv1dOFfQWdpXr+7gsWOdUWDhzBhg6FAJdiNA4nnLvpyHFxP00Zcw
|
||||||
|
xdDRMb05RfLAAqwxVPzWUE82i0JHYQASLWkqH6HrKlyzuyHFNmFvOTvbboikRC6HPUQBthh2soRd
|
||||||
|
x64DZK92msPsOkNjU1oFqrjzWB1PWwpxMPjMuSPVWrwrA+VBMrWpxnmKzX2TKUGZ465F45OlM/ZN
|
||||||
|
DCHudD2COaLXRFfU4BHYUuJmbwtHkDMlrEnBhvLOmn2LiWPRrxDWvWbqFVD8CNboArP52cMupmfI
|
||||||
|
saXcUbKmNM/ip6Z8Y3n9VD2cpvReHB3CsDyMKMdTT/hXS7icgjhps20MW1LYYmCPec7qQEsL1ti0
|
||||||
|
Pz3h0BxDir64PCE1doCOOfEpiXUgxw2Tn5nW1/DMuaWtlu6B6CyAlX2rXUzBsNpyimJbKhTXBsW9
|
||||||
|
czRktPRf6v79YAwM+1ENqkyNdTyhmJ3OCkVLXekwJYvQpGKzBxK/Kkqpls4FtoBH3rYRQz0kzqiN
|
||||||
|
JSiKGw6c9xNoY6djfTOzakQfW2TRDJuiLKR6hsoaTnKbY3VRGk69HvrlWAO27ej3VRc9DSHuKyss
|
||||||
|
iGMFLJUvn/+jIER+pMpjhAadRTxloMfzJkqRAjUNucxbCvvlobFzjEGrNIQqei35io15qCa7NCYJ
|
||||||
|
q1q1spKVHPvaP1PF4NHW1rrVTYGfCX6iQMKlr+nsaAM4DIEdjqoRE7yrwmPWd7jFczMaccdeT7VA
|
||||||
|
OJ0JiQbKbHkccrQrSWfJYbE7eUqYHZO4sY6JfHE0gjhEVZ5qHBvoSo8ClFJM6znJev/9DQXbWs2v
|
||||||
|
U9Ri8qbw7uHHe0vGlv+IHCqWBrhUxbbsckL3XOM0l1VASXUJvTk/vHcW83KKdIgpzxb7jiXbxaOH
|
||||||
|
6P4cJbMUK+GZRr5Qh3ckzyyTthslRyRngZgaxGBIRWQ+tTL5VPjPdJoHCiy0/Er+UPbgOpSWFHr0
|
||||||
|
NGu8i542RloDxx1CN1ZXNZtvaxet07Te6rmLKZa2O1a5G3/PrasZW9Ivn/+we+BM/2dxHNHEAwm/
|
||||||
|
fP7DxX7DMs8vlX7ZpLBlZ8yyZKvEYeCP9YKOQZcQcD+NI9Gu+gr1SNLAdolICx233erXMorETOk1
|
||||||
|
PORzMZ0QN62tk0CUsH/xLq3jQtA4Md9YPwnryBAbYrZzDXc4ioYWZ7VsSgA22a8zyUQo8aCYWat4
|
||||||
|
zzJ4Oi8maoqizaxSQjgxoEic3Nik+vNk+XSYTUNshxQ3+tXWRcPC2j0ZHaPYHKo5Dotq/XQB8HOd
|
||||||
|
gcvZWLsYUuyH/JTjM9XjLt++Gf0tjqP30Xr99nay5pgxHA03l9PofO7waZwV9WSMXjh0Hfnj1uPM
|
||||||
|
jcVzPDFcnKT9v+G85HtMnaX9f9wfDbVjyT8NiTy785SPyxL9Uu/Zl5cdYK4BL5TSlh09ZaZkpfDU
|
||||||
|
YAnjH4bFSJenhqWlNCQe/zU0w1Nz69/e4Dd45RYXny7+CwAA//8DAOzQwR9DDQAA
|
||||||
|
headers:
|
||||||
|
CF-RAY:
|
||||||
|
- 93402298d9980110-GRU
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- application/json
|
||||||
|
Date:
|
||||||
|
- Mon, 21 Apr 2025 21:57:12 GMT
|
||||||
|
Server:
|
||||||
|
- cloudflare
|
||||||
|
Transfer-Encoding:
|
||||||
|
- chunked
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
access-control-expose-headers:
|
||||||
|
- X-Request-ID
|
||||||
|
alt-svc:
|
||||||
|
- h3=":443"; ma=86400
|
||||||
|
cf-cache-status:
|
||||||
|
- DYNAMIC
|
||||||
|
openai-organization:
|
||||||
|
- crewai-iuxna1
|
||||||
|
openai-processing-ms:
|
||||||
|
- '6385'
|
||||||
|
openai-version:
|
||||||
|
- '2020-10-01'
|
||||||
|
strict-transport-security:
|
||||||
|
- max-age=31536000; includeSubDomains; preload
|
||||||
|
x-ratelimit-limit-requests:
|
||||||
|
- '30000'
|
||||||
|
x-ratelimit-limit-tokens:
|
||||||
|
- '150000000'
|
||||||
|
x-ratelimit-remaining-requests:
|
||||||
|
- '29999'
|
||||||
|
x-ratelimit-remaining-tokens:
|
||||||
|
- '149999832'
|
||||||
|
x-ratelimit-reset-requests:
|
||||||
|
- 2ms
|
||||||
|
x-ratelimit-reset-tokens:
|
||||||
|
- 0s
|
||||||
|
x-request-id:
|
||||||
|
- req_2a19c29e1e9dd766289937937418044a
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||||
|
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||||
|
respond using the exact following format:\n\nThought: I now can give a great
|
||||||
|
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||||
|
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||||
|
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||||
|
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||||
|
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||||
|
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||||
|
Previous result:\nAs a Test Agent, I have been tasked with providing a complete
|
||||||
|
response that meets the expected criteria for output. Therefore, here is the
|
||||||
|
full content without summarization, detailed as required.\n\nIn the realm of
|
||||||
|
testing, particularly software testing, several critical components come into
|
||||||
|
play. The fundamental types of testing include but are not limited to:\n\n1.
|
||||||
|
**Unit Testing**: This is the initial phase where individual components or modules
|
||||||
|
of the software are tested in isolation. Unit tests help ensure that each part
|
||||||
|
of the program functions correctly on its own, which aids in catching bugs early
|
||||||
|
in the development process.\n\n2. **Integration Testing**: After unit testing,
|
||||||
|
integration testing follows. This phase focuses on verifying the interaction
|
||||||
|
between various components or systems and ensuring that they work together as
|
||||||
|
intended.\n\n3. **System Testing**: Once the integrated components have been
|
||||||
|
tested, system testing involves validating the complete and fully integrated
|
||||||
|
software product. This ensures that it meets the specified requirements and
|
||||||
|
works as expected in a real-world environment.\n\n4. **Acceptance Testing**:
|
||||||
|
This is typically the final phase of testing and is usually carried out by end-users
|
||||||
|
or clients. The goal is to validate the usability and functionality of the system
|
||||||
|
against business requirements. Acceptance testing confirms that the software
|
||||||
|
is ready for deployment and meets the user\u2019s needs.\n\nTo facilitate these
|
||||||
|
testing processes effectively, various tools are leveraged. These tools can
|
||||||
|
include:\n\n- **Automated Testing Tools**: Tools like Selenium for web applications
|
||||||
|
or JUnit for Java applications allow testers to automate repetitive testing
|
||||||
|
tasks, which increases efficiency and reduces the possibility of human error.\n\n-
|
||||||
|
**Test Management Tools**: Tools such as JIRA or TestRail are essential for
|
||||||
|
tracking test progress, managing test cases, and reporting testing outcomes.\n\n-
|
||||||
|
**Continuous Integration Tools**: Tools like Jenkins help in automating the
|
||||||
|
process of running tests as part of the development pipeline, ensuring that
|
||||||
|
any changes made in the codebase are continuously tested.\n\nIn conclusion,
|
||||||
|
thorough testing through various stages\u2014unit, integration, system, and
|
||||||
|
acceptance\u2014combined with the strategic use of specialized tools, lays the
|
||||||
|
foundation for delivering high-quality software. It ensures that the product
|
||||||
|
not only functions correctly but also meets user expectations, paving the way
|
||||||
|
for successful implementations and satisfied clients.\n\n\nTry again, making
|
||||||
|
sure to address the validation error.\n\nBegin! This is VERY important to you,
|
||||||
|
use the tools available and give your best Final Answer, your job depends on
|
||||||
|
it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||||
|
headers:
|
||||||
|
accept:
|
||||||
|
- application/json
|
||||||
|
accept-encoding:
|
||||||
|
- gzip, deflate, zstd
|
||||||
|
connection:
|
||||||
|
- keep-alive
|
||||||
|
content-length:
|
||||||
|
- '3539'
|
||||||
|
content-type:
|
||||||
|
- application/json
|
||||||
|
cookie:
|
||||||
|
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||||
|
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||||
|
host:
|
||||||
|
- api.openai.com
|
||||||
|
user-agent:
|
||||||
|
- OpenAI/Python 1.68.2
|
||||||
|
x-stainless-arch:
|
||||||
|
- arm64
|
||||||
|
x-stainless-async:
|
||||||
|
- 'false'
|
||||||
|
x-stainless-lang:
|
||||||
|
- python
|
||||||
|
x-stainless-os:
|
||||||
|
- MacOS
|
||||||
|
x-stainless-package-version:
|
||||||
|
- 1.68.2
|
||||||
|
x-stainless-raw-response:
|
||||||
|
- 'true'
|
||||||
|
x-stainless-read-timeout:
|
||||||
|
- '600.0'
|
||||||
|
x-stainless-retry-count:
|
||||||
|
- '0'
|
||||||
|
x-stainless-runtime:
|
||||||
|
- CPython
|
||||||
|
x-stainless-runtime-version:
|
||||||
|
- 3.11.12
|
||||||
|
method: POST
|
||||||
|
uri: https://api.openai.com/v1/chat/completions
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: !!binary |
|
||||||
|
H4sIAAAAAAAAA3RXTY/cuBG9768ojA9ZG90Ne3YSB3ObDJxgjMTe2G0ESHxhkyWpdiiWzI9uy/vn
|
||||||
|
gypKavXYexn0SGSx+F69V6XffwK4Ind1C1e2M9n2g9/+7X3+93+af7rxtw+PN2OPx5tCzc2bb+Px
|
||||||
|
v+/91UZ28OE3tHnetbPcDx4zcaivbUSTUaK+en3z5+vX13/55Vpf9OzQy7Z2yNsb3vYUaHv98vpm
|
||||||
|
+/L19tVfp90dk8V0dQv/+wkA4Hf9K3kGh1+vbuHlZn7SY0qmxavbZRHAVWQvT65MSpSyCflqc35p
|
||||||
|
OWQMmvoDBD6BNQFaOiIYaCVtMCGdMAJ8Dn+nYDzc6f+38BAgdwgRje+BG0jc5JOJCBlTptBuwIAA
|
||||||
|
EbHDkDTiMEQ2tgNKgClhyGQ8ZAYMqcjGTo4bBk/WCHgJesQMX4rxlEeQ3J2JLoEJDpriG/IeSsII
|
||||||
|
+HVAm+umHew7hIa95xOFFrhkTwGTZtuU4EyPIRuvyXHAkJOkL2+fXgE8NWhH63ED2A+dSfRNHh9N
|
||||||
|
JC4JPB7R193TBslMIq2vxz5ByeTpG8rLyKXtuGRdN0S2mNLuc/gcnj17BvtxQA34cU5lXyPLglc7
|
||||||
|
ePHiU6A8P3zx4hb2HSXI44DrNLKJLeYEFBwdyRXj5ea2gsoReswduwQnyh1VHi07PJiET/lAIazI
|
||||||
|
oQPGhmOfwKQJcHQ7eMhCp2JLMWUQrCUVhw2GhGBaQyFlOJQ2bQCDOXjJ0Al0PGBMcqA12XaAMXJM
|
||||||
|
cMCG9fQVIUPkNgqoutybjFHqocW0A0VEbp5AuRsHssb7EUzJ3IvwoCQ5s4mmxxPHxwSeHhHe6s6G
|
||||||
|
I7w1RyO4vFue7N692W/AzEX0pZB9hCNGaqbiBFeiQt3hfBmpqwtGr4Wwh5CxjXXPire7Ru5Q5txV
|
||||||
|
MJT/lMDGYidd6HkjdHwCR02DUQ7o2RWPwm3GaGxWEoEDggmcO4w7WB+5VKaAJ/idOpRVUv4HCujW
|
||||||
|
OhBwIHM7r4gRbfbjBshJOTejRNKDG2NROba56pFSKpqVIjInJxkcMJ8Qgz6TrISPQFmFSglEVqi2
|
||||||
|
Q+HI/ohw4NxBb4KUrYQ+87jUN9ou0Bc5sFJbVaas/sop9yYoi3e/Pix7OCr87/5xZnztNUrYL0LY
|
||||||
|
xzFl7L/TWM1zyjGdU1HtqN+jW0cUmRg4dexxB1PMeRMejS8mT6aEwW0zbzE4SHVdGtAulaaySXO1
|
||||||
|
mXypjAN2RvJZiRKMjZzSYlPJYpCflSjLwdHklA8ZMEj2Wh2LR0y4Bw7b1aNzmQbri5N0JkswweIG
|
||||||
|
SjIHEqfe6O6EtkTx7QUphraYaEJGPF9E6irifHFxBMUl9cy586MUFOVa7cGhAwxHihxEa0rZjVB2
|
||||||
|
Zy0OWdJY03bPwRUF5OwJh1Hhlq6hTpiyecSOvcOYNmDOceasHWaMvTaQWTmKf81XGlQlcbXVRsoY
|
||||||
|
yUy6kBbpRq06h4PnUXOvRdXQCtm5xBIMdGRpUbKnmsCPyZ+aYIKDyACTnPWlUMRe5Szna38MiC7t
|
||||||
|
4J77Xgpz6sMqV6ES4ZOs+h5F+PnT3f65xnmvzNRK+NHC93f750sbe7O0v/n9XgQqr/cM2DRkCUP2
|
||||||
|
o8jctNXsFxSqg6LwoTWMedQBQ0RhpjaqehcIsBdI0d1K8K3UwuIWF2er59ZXIk46G62AjKEzwc56
|
||||||
|
TgOKioKDiJ6mql71110NCanYToT3ET0GKr2GOuHhcoiZ7QsiDpgpyyik7WoDp448rs3rbhgkjrjh
|
||||||
|
AYU7pzF7PshCMwxnFSZqg3qE4kj9EPmoFygxmMhFJhHqsVaBsbZEY8fdjJJgA/9S8LVtLRjtz8m8
|
||||||
|
ffhwN9vmB0Ne8ZaRIg0Ykjl41OQ4tibUwWjxt6bhWB0eE8LgTa6jQ47GPuqypaVvlhqQp1YU4NgW
|
||||||
|
HdIEwOomjbHCgsBo2Xtz4Km/mZ71YCNi7A8q48UurTdqQQqBp1Zt4+kM9qTuFojuOWQKRRz0oofP
|
||||||
|
SP3B+5/vH55XSjdLgbzF8Eh19LqnaD3eP2zOdbFOghudxMB2JrQTeSuN0OqgKV+xtFhCmPFPdWQT
|
||||||
|
7msoKXXue8o6r9Vepr3ZInToh3Q5f4cRAp7UeXNkV6yOCE5YwQSBMxwimkfAr1RTPrcI9f6UI5re
|
||||||
|
U/jRdCQDRuP5tDjFPYsDJeKg5jA10/M3wx+P5BedSwaKzRqdzeTQm7n8n/j6pqKUMKpoDDi9jc0w
|
||||||
|
Yq7ji+UQaj8dShxYmz849CSOHFroqO22y8fJlKeYbBk8ujqWqZtkaWkt2YvpgJsfzTW1bFQJ/Vmc
|
||||||
|
iX2ZJgG5zf3DslCH/461ns/fV5XVeWabjMxOv0cYOE/uPA1t08S97k6ffCbJTYY/bGTMm21L9a0F
|
||||||
|
k37QkaQ+OIixa2dcjQ8XnelQMuBXK23p+y+4Oj6UYRAPEQ1JmTfFT9NBDcclW+5xniIdeWpX8zd4
|
||||||
|
M9bG3KoX6mQrblUd3V9+W7hi/w8AAP//jJhBbsQgDEX3OUXEASp12s70MiPkgkndMoDALHP3CsIE
|
||||||
|
0s6i6weOfyKI/+emZkFXR5BZ5cT+VnwGMCXThtnS2v6zba09jYY6oskJiql32doBgHO+CSxW/trI
|
||||||
|
upt365cQ/Uf6tVUYcpQ+ZURI3hWjntgHUek6zfO1hgT54PtFiP4WWLL/xvq480sLCUTPJgb6eqdc
|
||||||
|
xo4Onk+XOzlUlBoZyKYhaBCqTBS67+2pBGRNfgDToPtvP49qb9rJLf8p30E99ahliKhJHTX3ZRG/
|
||||||
|
6jF/vGx/z7VhUS8MhZIJY/kWGg1ku0UqYrtypCG3YAyRtlzFBGku+u0M73BSYlqnHwAAAP//AwDv
|
||||||
|
DD9WZRIAAA==
|
||||||
|
headers:
|
||||||
|
CF-RAY:
|
||||||
|
- 934022c27c860110-GRU
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- application/json
|
||||||
|
Date:
|
||||||
|
- Mon, 21 Apr 2025 21:57:22 GMT
|
||||||
|
Server:
|
||||||
|
- cloudflare
|
||||||
|
Transfer-Encoding:
|
||||||
|
- chunked
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
access-control-expose-headers:
|
||||||
|
- X-Request-ID
|
||||||
|
alt-svc:
|
||||||
|
- h3=":443"; ma=86400
|
||||||
|
cf-cache-status:
|
||||||
|
- DYNAMIC
|
||||||
|
openai-organization:
|
||||||
|
- crewai-iuxna1
|
||||||
|
openai-processing-ms:
|
||||||
|
- '9187'
|
||||||
|
openai-version:
|
||||||
|
- '2020-10-01'
|
||||||
|
strict-transport-security:
|
||||||
|
- max-age=31536000; includeSubDomains; preload
|
||||||
|
x-ratelimit-limit-requests:
|
||||||
|
- '30000'
|
||||||
|
x-ratelimit-limit-tokens:
|
||||||
|
- '150000000'
|
||||||
|
x-ratelimit-remaining-requests:
|
||||||
|
- '29999'
|
||||||
|
x-ratelimit-remaining-tokens:
|
||||||
|
- '149999158'
|
||||||
|
x-ratelimit-reset-requests:
|
||||||
|
- 2ms
|
||||||
|
x-ratelimit-reset-tokens:
|
||||||
|
- 0s
|
||||||
|
x-request-id:
|
||||||
|
- req_94bb40dead4c4e9c7fa12de3bfb636b7
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
version: 1
|
||||||
522
tests/cassettes/test_guardrail_using_llm.yaml
Normal file
522
tests/cassettes/test_guardrail_using_llm.yaml
Normal file
@@ -0,0 +1,522 @@
|
|||||||
|
interactions:
|
||||||
|
- request:
|
||||||
|
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||||
|
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||||
|
respond using the exact following format:\n\nThought: I now can give a great
|
||||||
|
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||||
|
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||||
|
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||||
|
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||||
|
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||||
|
VERY important to you, use the tools available and give your best Final Answer,
|
||||||
|
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||||
|
headers:
|
||||||
|
accept:
|
||||||
|
- application/json
|
||||||
|
accept-encoding:
|
||||||
|
- gzip, deflate, zstd
|
||||||
|
connection:
|
||||||
|
- keep-alive
|
||||||
|
content-length:
|
||||||
|
- '807'
|
||||||
|
content-type:
|
||||||
|
- application/json
|
||||||
|
host:
|
||||||
|
- api.openai.com
|
||||||
|
user-agent:
|
||||||
|
- OpenAI/Python 1.68.2
|
||||||
|
x-stainless-arch:
|
||||||
|
- arm64
|
||||||
|
x-stainless-async:
|
||||||
|
- 'false'
|
||||||
|
x-stainless-lang:
|
||||||
|
- python
|
||||||
|
x-stainless-os:
|
||||||
|
- MacOS
|
||||||
|
x-stainless-package-version:
|
||||||
|
- 1.68.2
|
||||||
|
x-stainless-raw-response:
|
||||||
|
- 'true'
|
||||||
|
x-stainless-read-timeout:
|
||||||
|
- '600.0'
|
||||||
|
x-stainless-retry-count:
|
||||||
|
- '0'
|
||||||
|
x-stainless-runtime:
|
||||||
|
- CPython
|
||||||
|
x-stainless-runtime-version:
|
||||||
|
- 3.11.12
|
||||||
|
method: POST
|
||||||
|
uri: https://api.openai.com/v1/chat/completions
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: !!binary |
|
||||||
|
H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
|
||||||
|
8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
|
||||||
|
aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
|
||||||
|
RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
|
||||||
|
AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
|
||||||
|
MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
|
||||||
|
mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
|
||||||
|
lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
|
||||||
|
2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
|
||||||
|
GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
|
||||||
|
EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
|
||||||
|
+Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
|
||||||
|
p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
|
||||||
|
WrN6Wv0LAAD//wMAAfXtOswFAAA=
|
||||||
|
headers:
|
||||||
|
CF-RAY:
|
||||||
|
- 934022059c2c0110-GRU
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- application/json
|
||||||
|
Date:
|
||||||
|
- Mon, 21 Apr 2025 21:56:45 GMT
|
||||||
|
Server:
|
||||||
|
- cloudflare
|
||||||
|
Set-Cookie:
|
||||||
|
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||||
|
path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
|
||||||
|
Secure; SameSite=None
|
||||||
|
- _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
|
||||||
|
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||||
|
Transfer-Encoding:
|
||||||
|
- chunked
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
access-control-expose-headers:
|
||||||
|
- X-Request-ID
|
||||||
|
alt-svc:
|
||||||
|
- h3=":443"; ma=86400
|
||||||
|
cf-cache-status:
|
||||||
|
- DYNAMIC
|
||||||
|
openai-organization:
|
||||||
|
- crewai-iuxna1
|
||||||
|
openai-processing-ms:
|
||||||
|
- '2377'
|
||||||
|
openai-version:
|
||||||
|
- '2020-10-01'
|
||||||
|
strict-transport-security:
|
||||||
|
- max-age=31536000; includeSubDomains; preload
|
||||||
|
x-ratelimit-limit-requests:
|
||||||
|
- '30000'
|
||||||
|
x-ratelimit-limit-tokens:
|
||||||
|
- '150000000'
|
||||||
|
x-ratelimit-remaining-requests:
|
||||||
|
- '29999'
|
||||||
|
x-ratelimit-remaining-tokens:
|
||||||
|
- '149999832'
|
||||||
|
x-ratelimit-reset-requests:
|
||||||
|
- 2ms
|
||||||
|
x-ratelimit-reset-tokens:
|
||||||
|
- 0s
|
||||||
|
x-request-id:
|
||||||
|
- req_f39581c88a83855cf77c06098b787948
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||||
|
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||||
|
respond using the exact following format:\n\nThought: I now can give a great
|
||||||
|
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||||
|
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||||
|
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||||
|
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||||
|
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||||
|
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||||
|
Previous result:\nThe task at hand requires a comprehensive and detailed response
|
||||||
|
that meets the expected criteria for output. In fulfilling this requirement,
|
||||||
|
it is essential to cover all relevant aspects and provide complete content,
|
||||||
|
ensuring that the information is clear, concise, and accurately addresses the
|
||||||
|
needs of the task. Therefore, I will outline the necessary components, adhere
|
||||||
|
to any guidelines provided, and ensure that the final output is thorough and
|
||||||
|
well-presented, without omitting important details or summarizing the information.
|
||||||
|
This approach will guarantee a high-quality response that satisfies the outlined
|
||||||
|
expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
|
||||||
|
This is VERY important to you, use the tools available and give your best Final
|
||||||
|
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
|
||||||
|
["\nObservation:"]}'
|
||||||
|
headers:
|
||||||
|
accept:
|
||||||
|
- application/json
|
||||||
|
accept-encoding:
|
||||||
|
- gzip, deflate, zstd
|
||||||
|
connection:
|
||||||
|
- keep-alive
|
||||||
|
content-length:
|
||||||
|
- '1619'
|
||||||
|
content-type:
|
||||||
|
- application/json
|
||||||
|
cookie:
|
||||||
|
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||||
|
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||||
|
host:
|
||||||
|
- api.openai.com
|
||||||
|
user-agent:
|
||||||
|
- OpenAI/Python 1.68.2
|
||||||
|
x-stainless-arch:
|
||||||
|
- arm64
|
||||||
|
x-stainless-async:
|
||||||
|
- 'false'
|
||||||
|
x-stainless-lang:
|
||||||
|
- python
|
||||||
|
x-stainless-os:
|
||||||
|
- MacOS
|
||||||
|
x-stainless-package-version:
|
||||||
|
- 1.68.2
|
||||||
|
x-stainless-raw-response:
|
||||||
|
- 'true'
|
||||||
|
x-stainless-read-timeout:
|
||||||
|
- '600.0'
|
||||||
|
x-stainless-retry-count:
|
||||||
|
- '0'
|
||||||
|
x-stainless-runtime:
|
||||||
|
- CPython
|
||||||
|
x-stainless-runtime-version:
|
||||||
|
- 3.11.12
|
||||||
|
method: POST
|
||||||
|
uri: https://api.openai.com/v1/chat/completions
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: !!binary |
|
||||||
|
H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
|
||||||
|
ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
|
||||||
|
PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
|
||||||
|
F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
|
||||||
|
NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
|
||||||
|
xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
|
||||||
|
8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
|
||||||
|
y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
|
||||||
|
moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
|
||||||
|
fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
|
||||||
|
gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
|
||||||
|
5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
|
||||||
|
Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
|
||||||
|
ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
|
||||||
|
cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
|
||||||
|
ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
|
||||||
|
mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
|
||||||
|
jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
|
||||||
|
ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
|
||||||
|
+FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
|
||||||
|
GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
|
||||||
|
LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
|
||||||
|
headers:
|
||||||
|
CF-RAY:
|
||||||
|
- 93402216690b0110-GRU
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- application/json
|
||||||
|
Date:
|
||||||
|
- Mon, 21 Apr 2025 21:56:49 GMT
|
||||||
|
Server:
|
||||||
|
- cloudflare
|
||||||
|
Transfer-Encoding:
|
||||||
|
- chunked
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
access-control-expose-headers:
|
||||||
|
- X-Request-ID
|
||||||
|
alt-svc:
|
||||||
|
- h3=":443"; ma=86400
|
||||||
|
cf-cache-status:
|
||||||
|
- DYNAMIC
|
||||||
|
openai-organization:
|
||||||
|
- crewai-iuxna1
|
||||||
|
openai-processing-ms:
|
||||||
|
- '4451'
|
||||||
|
openai-version:
|
||||||
|
- '2020-10-01'
|
||||||
|
strict-transport-security:
|
||||||
|
- max-age=31536000; includeSubDomains; preload
|
||||||
|
x-ratelimit-limit-requests:
|
||||||
|
- '30000'
|
||||||
|
x-ratelimit-limit-tokens:
|
||||||
|
- '150000000'
|
||||||
|
x-ratelimit-remaining-requests:
|
||||||
|
- '29999'
|
||||||
|
x-ratelimit-remaining-tokens:
|
||||||
|
- '149999631'
|
||||||
|
x-ratelimit-reset-requests:
|
||||||
|
- 2ms
|
||||||
|
x-ratelimit-reset-tokens:
|
||||||
|
- 0s
|
||||||
|
x-request-id:
|
||||||
|
- req_8383a16d5f5b7f53d659bebf481ba936
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||||
|
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||||
|
respond using the exact following format:\n\nThought: I now can give a great
|
||||||
|
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||||
|
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||||
|
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||||
|
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||||
|
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||||
|
VERY important to you, use the tools available and give your best Final Answer,
|
||||||
|
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||||
|
headers:
|
||||||
|
accept:
|
||||||
|
- application/json
|
||||||
|
accept-encoding:
|
||||||
|
- gzip, deflate, zstd
|
||||||
|
connection:
|
||||||
|
- keep-alive
|
||||||
|
content-length:
|
||||||
|
- '807'
|
||||||
|
content-type:
|
||||||
|
- application/json
|
||||||
|
cookie:
|
||||||
|
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||||
|
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||||
|
host:
|
||||||
|
- api.openai.com
|
||||||
|
user-agent:
|
||||||
|
- OpenAI/Python 1.68.2
|
||||||
|
x-stainless-arch:
|
||||||
|
- arm64
|
||||||
|
x-stainless-async:
|
||||||
|
- 'false'
|
||||||
|
x-stainless-lang:
|
||||||
|
- python
|
||||||
|
x-stainless-os:
|
||||||
|
- MacOS
|
||||||
|
x-stainless-package-version:
|
||||||
|
- 1.68.2
|
||||||
|
x-stainless-raw-response:
|
||||||
|
- 'true'
|
||||||
|
x-stainless-read-timeout:
|
||||||
|
- '600.0'
|
||||||
|
x-stainless-retry-count:
|
||||||
|
- '0'
|
||||||
|
x-stainless-runtime:
|
||||||
|
- CPython
|
||||||
|
x-stainless-runtime-version:
|
||||||
|
- 3.11.12
|
||||||
|
method: POST
|
||||||
|
uri: https://api.openai.com/v1/chat/completions
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: !!binary |
|
||||||
|
H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
|
||||||
|
v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
|
||||||
|
n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
|
||||||
|
s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
|
||||||
|
t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
|
||||||
|
JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
|
||||||
|
6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
|
||||||
|
uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
|
||||||
|
SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
|
||||||
|
6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
|
||||||
|
ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
|
||||||
|
K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
|
||||||
|
Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
|
||||||
|
8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
|
||||||
|
+/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
|
||||||
|
t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
|
||||||
|
headers:
|
||||||
|
CF-RAY:
|
||||||
|
- 93402233baf00110-GRU
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- application/json
|
||||||
|
Date:
|
||||||
|
- Mon, 21 Apr 2025 21:56:56 GMT
|
||||||
|
Server:
|
||||||
|
- cloudflare
|
||||||
|
Transfer-Encoding:
|
||||||
|
- chunked
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
access-control-expose-headers:
|
||||||
|
- X-Request-ID
|
||||||
|
alt-svc:
|
||||||
|
- h3=":443"; ma=86400
|
||||||
|
cf-cache-status:
|
||||||
|
- DYNAMIC
|
||||||
|
openai-organization:
|
||||||
|
- crewai-iuxna1
|
||||||
|
openai-processing-ms:
|
||||||
|
- '6058'
|
||||||
|
openai-version:
|
||||||
|
- '2020-10-01'
|
||||||
|
strict-transport-security:
|
||||||
|
- max-age=31536000; includeSubDomains; preload
|
||||||
|
x-ratelimit-limit-requests:
|
||||||
|
- '30000'
|
||||||
|
x-ratelimit-limit-tokens:
|
||||||
|
- '150000000'
|
||||||
|
x-ratelimit-remaining-requests:
|
||||||
|
- '29999'
|
||||||
|
x-ratelimit-remaining-tokens:
|
||||||
|
- '149999832'
|
||||||
|
x-ratelimit-reset-requests:
|
||||||
|
- 2ms
|
||||||
|
x-ratelimit-reset-tokens:
|
||||||
|
- 0s
|
||||||
|
x-request-id:
|
||||||
|
- req_f5273114a4a797fd0928674edb442194
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
- request:
|
||||||
|
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||||
|
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||||
|
respond using the exact following format:\n\nThought: I now can give a great
|
||||||
|
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||||
|
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||||
|
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||||
|
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||||
|
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||||
|
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||||
|
Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
|
||||||
|
in a controlled environment. The criteria for evaluating success include clarity
|
||||||
|
of instructions, alignment with learning objectives, and the level of engagement
|
||||||
|
they elicit from participants. Specifically, a test task should include a clear
|
||||||
|
and concise prompt that outlines what is expected from the participants. Additionally,
|
||||||
|
it should have a well-defined scoring rubric that assesses the quality of responses
|
||||||
|
based on predetermined criteria. By ensuring that the test task is relevant
|
||||||
|
and challenging, it will effectively measure the participants\u2019 capabilities
|
||||||
|
and provide valuable insights into their understanding of the subject matter.
|
||||||
|
Effective preparation, testing methods, and feedback mechanisms are essential
|
||||||
|
to the success of any test task, while also maintaining an environment conducive
|
||||||
|
to learning and growth.\n\n\nTry again, making sure to address the validation
|
||||||
|
error.\n\nBegin! This is VERY important to you, use the tools available and
|
||||||
|
give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
|
||||||
|
"gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||||
|
headers:
|
||||||
|
accept:
|
||||||
|
- application/json
|
||||||
|
accept-encoding:
|
||||||
|
- gzip, deflate, zstd
|
||||||
|
connection:
|
||||||
|
- keep-alive
|
||||||
|
content-length:
|
||||||
|
- '1887'
|
||||||
|
content-type:
|
||||||
|
- application/json
|
||||||
|
cookie:
|
||||||
|
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||||
|
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||||
|
host:
|
||||||
|
- api.openai.com
|
||||||
|
user-agent:
|
||||||
|
- OpenAI/Python 1.68.2
|
||||||
|
x-stainless-arch:
|
||||||
|
- arm64
|
||||||
|
x-stainless-async:
|
||||||
|
- 'false'
|
||||||
|
x-stainless-lang:
|
||||||
|
- python
|
||||||
|
x-stainless-os:
|
||||||
|
- MacOS
|
||||||
|
x-stainless-package-version:
|
||||||
|
- 1.68.2
|
||||||
|
x-stainless-raw-response:
|
||||||
|
- 'true'
|
||||||
|
x-stainless-read-timeout:
|
||||||
|
- '600.0'
|
||||||
|
x-stainless-retry-count:
|
||||||
|
- '0'
|
||||||
|
x-stainless-runtime:
|
||||||
|
- CPython
|
||||||
|
x-stainless-runtime-version:
|
||||||
|
- 3.11.12
|
||||||
|
method: POST
|
||||||
|
uri: https://api.openai.com/v1/chat/completions
|
||||||
|
response:
|
||||||
|
body:
|
||||||
|
string: !!binary |
|
||||||
|
H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
|
||||||
|
fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
|
||||||
|
+9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
|
||||||
|
HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
|
||||||
|
e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
|
||||||
|
i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
|
||||||
|
CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
|
||||||
|
ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
|
||||||
|
QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
|
||||||
|
dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
|
||||||
|
xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
|
||||||
|
4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
|
||||||
|
9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
|
||||||
|
edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
|
||||||
|
qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
|
||||||
|
teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
|
||||||
|
YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
|
||||||
|
5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
|
||||||
|
xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
|
||||||
|
VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
|
||||||
|
5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
|
||||||
|
kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
|
||||||
|
Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
|
||||||
|
6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
|
||||||
|
pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
|
||||||
|
IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
|
||||||
|
LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
|
||||||
|
VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
|
||||||
|
C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
|
||||||
|
y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
|
||||||
|
sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
|
||||||
|
DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
|
||||||
|
headers:
|
||||||
|
CF-RAY:
|
||||||
|
- 9340225b9bca0110-GRU
|
||||||
|
Connection:
|
||||||
|
- keep-alive
|
||||||
|
Content-Encoding:
|
||||||
|
- gzip
|
||||||
|
Content-Type:
|
||||||
|
- application/json
|
||||||
|
Date:
|
||||||
|
- Mon, 21 Apr 2025 21:57:05 GMT
|
||||||
|
Server:
|
||||||
|
- cloudflare
|
||||||
|
Transfer-Encoding:
|
||||||
|
- chunked
|
||||||
|
X-Content-Type-Options:
|
||||||
|
- nosniff
|
||||||
|
access-control-expose-headers:
|
||||||
|
- X-Request-ID
|
||||||
|
alt-svc:
|
||||||
|
- h3=":443"; ma=86400
|
||||||
|
cf-cache-status:
|
||||||
|
- DYNAMIC
|
||||||
|
openai-organization:
|
||||||
|
- crewai-iuxna1
|
||||||
|
openai-processing-ms:
|
||||||
|
- '9141'
|
||||||
|
openai-version:
|
||||||
|
- '2020-10-01'
|
||||||
|
strict-transport-security:
|
||||||
|
- max-age=31536000; includeSubDomains; preload
|
||||||
|
x-ratelimit-limit-requests:
|
||||||
|
- '30000'
|
||||||
|
x-ratelimit-limit-tokens:
|
||||||
|
- '150000000'
|
||||||
|
x-ratelimit-remaining-requests:
|
||||||
|
- '29999'
|
||||||
|
x-ratelimit-remaining-tokens:
|
||||||
|
- '149999564'
|
||||||
|
x-ratelimit-reset-requests:
|
||||||
|
- 2ms
|
||||||
|
x-ratelimit-reset-tokens:
|
||||||
|
- 0s
|
||||||
|
x-request-id:
|
||||||
|
- req_0fc29337116c1d19a0543dfe5b0db291
|
||||||
|
status:
|
||||||
|
code: 200
|
||||||
|
message: OK
|
||||||
|
version: 1
|
||||||
@@ -1,11 +1,16 @@
|
|||||||
"""Tests for task guardrails functionality."""
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from unittest.mock import Mock
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from crewai.task import Task
|
from crewai import Agent, Task
|
||||||
|
from crewai.llm import LLM
|
||||||
|
from crewai.tasks.guardrail_task import GuardrailTask
|
||||||
from crewai.tasks.task_output import TaskOutput
|
from crewai.tasks.task_output import TaskOutput
|
||||||
|
from crewai.utilities.events import (
|
||||||
|
GuardrailTaskCompletedEvent,
|
||||||
|
GuardrailTaskStartedEvent,
|
||||||
|
)
|
||||||
|
from crewai.utilities.events.crewai_event_bus import crewai_event_bus
|
||||||
|
|
||||||
|
|
||||||
def test_task_without_guardrail():
|
def test_task_without_guardrail():
|
||||||
@@ -22,7 +27,7 @@ def test_task_without_guardrail():
|
|||||||
assert result.raw == "test result"
|
assert result.raw == "test result"
|
||||||
|
|
||||||
|
|
||||||
def test_task_with_successful_guardrail():
|
def test_task_with_successful_guardrail_func():
|
||||||
"""Test that successful guardrail validation passes transformed result."""
|
"""Test that successful guardrail validation passes transformed result."""
|
||||||
|
|
||||||
def guardrail(result: TaskOutput):
|
def guardrail(result: TaskOutput):
|
||||||
@@ -127,3 +132,190 @@ def test_guardrail_error_in_context():
|
|||||||
|
|
||||||
assert "Task failed guardrail validation" in str(exc_info.value)
|
assert "Task failed guardrail validation" in str(exc_info.value)
|
||||||
assert "Expected JSON, got string" in str(exc_info.value)
|
assert "Expected JSON, got string" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_agent():
|
||||||
|
return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||||
|
def test_guardrail_using_llm(sample_agent):
|
||||||
|
task = Task(
|
||||||
|
description="Test task",
|
||||||
|
expected_output="Output",
|
||||||
|
guardrail="Ensure the output is equal to 'good result'",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||||
|
side_effect=[(False, "bad result"), (True, "good result")],
|
||||||
|
) as mock_guardrail:
|
||||||
|
task.execute_sync(agent=sample_agent)
|
||||||
|
|
||||||
|
assert mock_guardrail.call_count == 2
|
||||||
|
|
||||||
|
task.guardrail = GuardrailTask(
|
||||||
|
description="Ensure the output is equal to 'good result'",
|
||||||
|
llm=LLM(model="gpt-4o-mini"),
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||||
|
side_effect=[(False, "bad result"), (True, "good result")],
|
||||||
|
) as mock_guardrail:
|
||||||
|
task.execute_sync(agent=sample_agent)
|
||||||
|
|
||||||
|
assert mock_guardrail.call_count == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def task_output():
|
||||||
|
return TaskOutput(
|
||||||
|
raw="Test output",
|
||||||
|
description="Test task",
|
||||||
|
expected_output="Output",
|
||||||
|
agent="Test Agent",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_guardrail_task_initialization_no_llm(task_output):
|
||||||
|
"""Test GuardrailTask initialization fails without LLM"""
|
||||||
|
with pytest.raises(ValueError, match="Provide a valid LLM to the GuardrailTask"):
|
||||||
|
GuardrailTask(description="Test")(task_output)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_llm():
|
||||||
|
llm = Mock(spec=LLM)
|
||||||
|
llm.call.return_value = """
|
||||||
|
output = 'Sample book data'
|
||||||
|
if isinstance(output, str):
|
||||||
|
result = (True, output)
|
||||||
|
else:
|
||||||
|
result = (False, 'Invalid output format')
|
||||||
|
print(result)
|
||||||
|
"""
|
||||||
|
return llm
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"tool_run_output",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"output": "(True, 'Valid output')",
|
||||||
|
"expected_result": True,
|
||||||
|
"expected_output": "Valid output",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output": "(False, 'Invalid output format')",
|
||||||
|
"expected_result": False,
|
||||||
|
"expected_output": "Invalid output format",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output": "Something went wrong while running the code, Invalid output format",
|
||||||
|
"expected_result": False,
|
||||||
|
"expected_output": "Something went wrong while running the code, Invalid output format",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output": "No result variable found",
|
||||||
|
"expected_result": False,
|
||||||
|
"expected_output": "No result variable found",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"output": (False, "Invalid output format"),
|
||||||
|
"expected_result": False,
|
||||||
|
"expected_output": "Invalid output format",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||||
|
def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_output):
|
||||||
|
mock_run.return_value = tool_run_output["output"]
|
||||||
|
|
||||||
|
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
|
||||||
|
|
||||||
|
result = guardrail(task_output)
|
||||||
|
assert result[0] == tool_run_output["expected_result"]
|
||||||
|
assert result[1] == tool_run_output["expected_output"]
|
||||||
|
|
||||||
|
|
||||||
|
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||||
|
def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
|
||||||
|
mock_run.return_value = "(True, 'Valid output')"
|
||||||
|
additional_instructions = (
|
||||||
|
"This is an additional instruction created by the user follow it strictly"
|
||||||
|
)
|
||||||
|
guardrail = GuardrailTask(
|
||||||
|
description="Test validation",
|
||||||
|
llm=mock_llm,
|
||||||
|
additional_instructions=additional_instructions,
|
||||||
|
)
|
||||||
|
|
||||||
|
guardrail(task_output)
|
||||||
|
|
||||||
|
assert additional_instructions in str(mock_llm.call.call_args)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||||
|
def test_guardrail_emits_events(sample_agent):
|
||||||
|
started_guardrail = []
|
||||||
|
completed_guardrail = []
|
||||||
|
|
||||||
|
with crewai_event_bus.scoped_handlers():
|
||||||
|
|
||||||
|
@crewai_event_bus.on(GuardrailTaskStartedEvent)
|
||||||
|
def handle_guardrail_started(source, event):
|
||||||
|
started_guardrail.append(
|
||||||
|
{"guardrail": event.guardrail, "retry_count": event.retry_count}
|
||||||
|
)
|
||||||
|
|
||||||
|
@crewai_event_bus.on(GuardrailTaskCompletedEvent)
|
||||||
|
def handle_guardrail_completed(source, event):
|
||||||
|
completed_guardrail.append(
|
||||||
|
{
|
||||||
|
"success": event.success,
|
||||||
|
"result": event.result,
|
||||||
|
"error": event.error,
|
||||||
|
"retry_count": event.retry_count,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
description="Test task",
|
||||||
|
expected_output="Output",
|
||||||
|
guardrail="Ensure the output is equal to 'good result'",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"crewai.tasks.guardrail_task.GuardrailTask.__call__",
|
||||||
|
side_effect=[(False, "bad result"), (True, "good result")],
|
||||||
|
):
|
||||||
|
task.execute_sync(agent=sample_agent)
|
||||||
|
|
||||||
|
expected_started_events = [
|
||||||
|
{
|
||||||
|
"guardrail": "Ensure the output is equal to 'good result'",
|
||||||
|
"retry_count": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"guardrail": "Ensure the output is equal to 'good result'",
|
||||||
|
"retry_count": 1,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
expected_completed_events = [
|
||||||
|
{
|
||||||
|
"success": False,
|
||||||
|
"result": None,
|
||||||
|
"error": "bad result",
|
||||||
|
"retry_count": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"success": True,
|
||||||
|
"result": "good result",
|
||||||
|
"error": None,
|
||||||
|
"retry_count": 1,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
assert started_guardrail == expected_started_events
|
||||||
|
assert completed_guardrail == expected_completed_events
|
||||||
|
|||||||
Reference in New Issue
Block a user