mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
refactor: simplify TaskGuardrail to use LLM for validation, no code generation
This commit is contained in:
@@ -324,11 +324,7 @@ blog_task = Task(
|
||||
|
||||
### TaskGuardrail
|
||||
|
||||
The `TaskGuardrail` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
|
||||
|
||||
#### Code Execution
|
||||
|
||||
The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
|
||||
The `TaskGuardrail` class offers a robust mechanism for validating task outputs
|
||||
|
||||
### Error Handling Best Practices
|
||||
|
||||
@@ -791,7 +787,7 @@ task = Task(
|
||||
)
|
||||
```
|
||||
|
||||
##### Using YAML
|
||||
#### Using YAML
|
||||
|
||||
```yaml
|
||||
research_task:
|
||||
|
||||
@@ -1,39 +1,40 @@
|
||||
from typing import Any, Tuple
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai.agent import Agent, LiteAgentOutput
|
||||
from crewai.llm import LLM
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.printer import Printer
|
||||
|
||||
|
||||
class TaskGuardrailResult(BaseModel):
|
||||
valid: bool = Field(
|
||||
description="Whether the task output complies with the guardrail"
|
||||
)
|
||||
feedback: str | None = Field(
|
||||
description="A feedback about the task output if it is not valid",
|
||||
default=None,
|
||||
)
|
||||
|
||||
|
||||
class TaskGuardrail:
|
||||
"""A task that validates the output of another task using generated Python code.
|
||||
|
||||
This class generates and executes Python code to validate task outputs based on
|
||||
specified criteria. It uses an LLM to generate the validation code and provides
|
||||
safety guardrails for code execution.
|
||||
The code is executed in a Docker container if available, otherwise it is executed in a sandboxed environment.
|
||||
If unsafe mode is enabled, the code is executed in the current environment.
|
||||
This class is used to validate the output from a Task based on specified criteria.
|
||||
It uses an LLM to validate the output and provides a feedback if the output is not valid.
|
||||
|
||||
Args:
|
||||
description (str): The description of the validation criteria.
|
||||
task (Task, optional): The task whose output needs validation.
|
||||
llm (LLM, optional): The language model to use for code generation.
|
||||
additional_instructions (str, optional): Additional instructions for the guardrail task.
|
||||
unsafe_mode (bool, optional): Whether to run the code in unsafe mode.
|
||||
Raises:
|
||||
ValueError: If no valid LLM is provided.
|
||||
"""
|
||||
|
||||
generated_code: str = ""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
description: str,
|
||||
task: Task | None = None,
|
||||
llm: LLM | None = None,
|
||||
additional_instructions: str = "",
|
||||
unsafe_mode: bool = False,
|
||||
):
|
||||
self.description = description
|
||||
|
||||
@@ -47,84 +48,36 @@ class TaskGuardrail:
|
||||
)
|
||||
self.llm: LLM | None = llm or fallback_llm
|
||||
|
||||
self.additional_instructions = additional_instructions
|
||||
self.unsafe_mode = unsafe_mode
|
||||
def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
|
||||
agent = Agent(
|
||||
role="Guardrail Agent",
|
||||
goal="Validate the output of the task",
|
||||
backstory="You are a expert at validating the output of a task. By providing effective feedback if the output is not valid.",
|
||||
llm=self.llm,
|
||||
)
|
||||
|
||||
@property
|
||||
def system_instructions(self) -> str:
|
||||
"""System instructions for the LLM code generation.
|
||||
query = f"""
|
||||
Ensure the following task result complies with the given guardrail.
|
||||
|
||||
Returns:
|
||||
str: Complete system instructions including security constraints.
|
||||
Task result:
|
||||
{task_output.raw}
|
||||
|
||||
Guardrail:
|
||||
{self.description}
|
||||
|
||||
Your task:
|
||||
- Confirm if the Task result complies with the guardrail.
|
||||
- If not, provide clear feedback explaining what is wrong (e.g., by how much it violates the rule, or what specific part fails).
|
||||
- Focus only on identifying issues — do not propose corrections.
|
||||
- If the Task result complies with the guardrail, saying that is valid
|
||||
"""
|
||||
security_instructions = (
|
||||
"- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
|
||||
"- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
|
||||
"- Your code must not perform any file I/O, shell access, or dynamic code execution."
|
||||
)
|
||||
return (
|
||||
"You are a expert Python developer"
|
||||
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
|
||||
"Your code must:\n"
|
||||
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n"
|
||||
"- Use the literal string of the task output (already included in your input) if needed.\n"
|
||||
"- Generate the code **following strictly** the task description.\n"
|
||||
"- Be valid Python 3 — executable as-is.\n"
|
||||
f"{security_instructions}\n"
|
||||
"Additional instructions (do not override the previous instructions):\n"
|
||||
f"{self.additional_instructions}"
|
||||
)
|
||||
|
||||
def user_instructions(self, task_output: TaskOutput) -> str:
|
||||
"""Generates user instructions for the LLM code generation.
|
||||
result = agent.kickoff(query, response_format=TaskGuardrailResult)
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
|
||||
Returns:
|
||||
str: Instructions for generating validation code.
|
||||
"""
|
||||
return (
|
||||
"Based on the task description below, generate Python 3 code that validates the task output. \n"
|
||||
"Task description:\n"
|
||||
f"{self.description}\n"
|
||||
"Here is the raw output from the task: \n"
|
||||
f"'{task_output.raw}' \n"
|
||||
"Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
|
||||
"Now generate Python code that follows the instructions above."
|
||||
)
|
||||
|
||||
def generate_code(self, task_output: TaskOutput) -> str:
|
||||
"""Generates Python code for validating the task output.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
"""
|
||||
if self.llm is None:
|
||||
raise ValueError("Provide a valid LLM to the TaskGuardrail")
|
||||
|
||||
response = self.llm.call(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": self.system_instructions,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": self.user_instructions(task_output=task_output),
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
printer = Printer()
|
||||
printer.print(
|
||||
content=f"The following code was generated for the guardrail task:\n{response}\n",
|
||||
color="cyan",
|
||||
)
|
||||
return response
|
||||
return result
|
||||
|
||||
def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
|
||||
"""Executes the validation code on the task output.
|
||||
"""Validates the output of a task based on specified criteria.
|
||||
|
||||
Args:
|
||||
task_output (TaskOutput): The output to be validated.
|
||||
@@ -134,28 +87,16 @@ class TaskGuardrail:
|
||||
- bool: True if validation passed, False otherwise
|
||||
- Any: The validation result or error message
|
||||
"""
|
||||
import ast
|
||||
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
try:
|
||||
result = self._validate_output(task_output)
|
||||
assert isinstance(
|
||||
result.pydantic, TaskGuardrailResult
|
||||
), "The guardrail result is not a valid pydantic model"
|
||||
|
||||
self.generated_code = self.generate_code(task_output)
|
||||
|
||||
result = CodeInterpreterTool(
|
||||
code=self.generated_code, unsafe_mode=self.unsafe_mode
|
||||
).run()
|
||||
|
||||
error_messages = [
|
||||
"Something went wrong while running the code",
|
||||
"No result variable found", # when running in unsafe mode, the final output should be stored in the result variable
|
||||
]
|
||||
|
||||
if any(msg in result for msg in error_messages):
|
||||
return False, result
|
||||
|
||||
if isinstance(result, str):
|
||||
try:
|
||||
result = ast.literal_eval(result)
|
||||
except Exception as e:
|
||||
return False, f"Error parsing result: {str(e)}"
|
||||
|
||||
return result
|
||||
if result.pydantic.valid:
|
||||
return True, task_output.raw
|
||||
else:
|
||||
return False, result.pydantic.feedback
|
||||
except Exception as e:
|
||||
return False, f"Error while validating the task output: {str(e)}"
|
||||
|
||||
@@ -7,8 +7,8 @@ class TaskGuardrailStartedEvent(BaseEvent):
|
||||
"""Event emitted when a guardrail task starts
|
||||
|
||||
Attributes:
|
||||
messages: Content can be either a string or a list of dictionaries that support
|
||||
multimodal content (text, images, etc.)
|
||||
guardrail: The guardrail callable or TaskGuardrail instance
|
||||
retry_count: The number of times the guardrail has been retried
|
||||
"""
|
||||
|
||||
type: str = "task_guardrail_started"
|
||||
@@ -23,8 +23,7 @@ class TaskGuardrailStartedEvent(BaseEvent):
|
||||
super().__init__(**data)
|
||||
|
||||
if isinstance(self.guardrail, TaskGuardrail):
|
||||
assert self.guardrail.generated_code is not None
|
||||
self.guardrail = self.guardrail.generated_code.strip()
|
||||
self.guardrail = self.guardrail.description.strip()
|
||||
elif isinstance(self.guardrail, Callable):
|
||||
self.guardrail = getsource(self.guardrail).strip()
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,522 +0,0 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||
VERY important to you, use the tools available and give your best Final Answer,
|
||||
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '807'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
|
||||
8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
|
||||
aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
|
||||
RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
|
||||
AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
|
||||
MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
|
||||
mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
|
||||
lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
|
||||
2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
|
||||
GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
|
||||
EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
|
||||
+Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
|
||||
p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
|
||||
WrN6Wv0LAAD//wMAAfXtOswFAAA=
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 934022059c2c0110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:56:45 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '2377'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999832'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_f39581c88a83855cf77c06098b787948
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||
Previous result:\nThe task at hand requires a comprehensive and detailed response
|
||||
that meets the expected criteria for output. In fulfilling this requirement,
|
||||
it is essential to cover all relevant aspects and provide complete content,
|
||||
ensuring that the information is clear, concise, and accurately addresses the
|
||||
needs of the task. Therefore, I will outline the necessary components, adhere
|
||||
to any guidelines provided, and ensure that the final output is thorough and
|
||||
well-presented, without omitting important details or summarizing the information.
|
||||
This approach will guarantee a high-quality response that satisfies the outlined
|
||||
expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
|
||||
This is VERY important to you, use the tools available and give your best Final
|
||||
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
|
||||
["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1619'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
|
||||
ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
|
||||
PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
|
||||
F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
|
||||
NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
|
||||
xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
|
||||
8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
|
||||
y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
|
||||
moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
|
||||
fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
|
||||
gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
|
||||
5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
|
||||
Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
|
||||
ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
|
||||
cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
|
||||
ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
|
||||
mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
|
||||
jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
|
||||
ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
|
||||
+FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
|
||||
GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
|
||||
LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 93402216690b0110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:56:49 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '4451'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999631'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_8383a16d5f5b7f53d659bebf481ba936
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nBegin! This is
|
||||
VERY important to you, use the tools available and give your best Final Answer,
|
||||
your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '807'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
|
||||
v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
|
||||
n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
|
||||
s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
|
||||
t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
|
||||
JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
|
||||
6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
|
||||
uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
|
||||
SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
|
||||
6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
|
||||
ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
|
||||
K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
|
||||
Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
|
||||
8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
|
||||
+/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
|
||||
t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 93402233baf00110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:56:56 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '6058'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999832'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_f5273114a4a797fd0928674edb442194
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
|
||||
is the expected criteria for your final answer: Output\nyou MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
|
||||
Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
|
||||
in a controlled environment. The criteria for evaluating success include clarity
|
||||
of instructions, alignment with learning objectives, and the level of engagement
|
||||
they elicit from participants. Specifically, a test task should include a clear
|
||||
and concise prompt that outlines what is expected from the participants. Additionally,
|
||||
it should have a well-defined scoring rubric that assesses the quality of responses
|
||||
based on predetermined criteria. By ensuring that the test task is relevant
|
||||
and challenging, it will effectively measure the participants\u2019 capabilities
|
||||
and provide valuable insights into their understanding of the subject matter.
|
||||
Effective preparation, testing methods, and feedback mechanisms are essential
|
||||
to the success of any test task, while also maintaining an environment conducive
|
||||
to learning and growth.\n\n\nTry again, making sure to address the validation
|
||||
error.\n\nBegin! This is VERY important to you, use the tools available and
|
||||
give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
|
||||
"gpt-4o-mini", "stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate, zstd
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1887'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
|
||||
_cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.12
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
|
||||
fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
|
||||
+9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
|
||||
HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
|
||||
e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
|
||||
i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
|
||||
CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
|
||||
ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
|
||||
QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
|
||||
dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
|
||||
xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
|
||||
4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
|
||||
9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
|
||||
edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
|
||||
qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
|
||||
teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
|
||||
YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
|
||||
5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
|
||||
xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
|
||||
VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
|
||||
5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
|
||||
kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
|
||||
Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
|
||||
6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
|
||||
pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
|
||||
IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
|
||||
LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
|
||||
VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
|
||||
C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
|
||||
y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
|
||||
sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
|
||||
DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 9340225b9bca0110-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 21 Apr 2025 21:57:05 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '9141'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999564'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_0fc29337116c1d19a0543dfe5b0db291
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
288
tests/cassettes/test_guardrail_when_an_error_occurs.yaml
Normal file
288
tests/cassettes/test_guardrail_when_an_error_occurs.yaml
Normal file
@@ -0,0 +1,288 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
|
||||
personal goal is: Test Goal\nTo give my best complete final answer to the task
|
||||
respond using the exact following format:\n\nThought: I now can give a great
|
||||
answer\nFinal Answer: Your final answer must be the great and the most complete
|
||||
as possible, it must be outcome described.\n\nI MUST use these formats, my job
|
||||
depends on it!"}, {"role": "user", "content": "\nCurrent Task: Gather information
|
||||
about available books on the First World War\n\nThis is the expected criteria
|
||||
for your final answer: A list of available books on the First World War\nyou
|
||||
MUST return the actual complete content as the final answer, not a summary.\n\nBegin!
|
||||
This is VERY important to you, use the tools available and give your best Final
|
||||
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
|
||||
["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '903'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.9
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFdhb9w2Ev3uXzHQlwCBbewmThzvN1+Q2G3PQM+X1mjPhTFLjqSpKY46
|
||||
pHazV+S/H0hK2t00Be6LYZEi9d68x8fZP08AKrbVCirTYjRd787+cU/3P7+9tfTPq+1PTffDtr7+
|
||||
tf759+HG9tdSnaYVsv6dTJxWnRvpekeRxZdpo4SR0q7Ly4s3V68Xi+VlnujEkkvLmj6eXchZx57P
|
||||
Xi1eXZwtLs+W78bVrbChUK3gPycAAH/mvwmnt/S5WsHidBrpKARsqFrNLwFUKi6NVBgCh4g+Vqf7
|
||||
SSM+ks/QvwMvWzDooeENAUKTYAP6sCUFePQf2aOD6/y8gltSAg6AkOgqteRDWuc4RJAacIPscO0I
|
||||
1iLPAcRDbAk+soYID6LOwgPq6tE/+uU5vHz5WH1qCf7tiPotumfSsIJb2cKHQaUneCAfIUpaA+xh
|
||||
ebW8eKxgvYP3rXKI0rek8N6hPr98mcACwKeWQ/44WHIbCsA+SgZRBPoMNZooGiC2GMGRhXFehrhW
|
||||
wufEIz1vUU9B6pqUfQPsAzdtPNivF8eRDTpAbyGIYXRgdx47NiFtQqhuB68WsQVDPg66G3mdJ/6v
|
||||
Cv/rsSw/eSueVpDrEUV3E4ybLMhDApP4J7TLq+W7Uoeb8+/P4Y52pHMB8tOLAPS5xyKOR1WM6T8j
|
||||
G8rMCchH1kwSthxbQKjFDFmxtcQWOnYcUXcQomKkhilknmltO3To0wdImbyhAOTtoGQTpCDOcvpK
|
||||
etvwhh2jD4COnwvx1yNx5+BfA1OcTPJAIZJ6+KjiY6H3Qdm0cIfKCPfUof4x0Ez0GnrhxqOP4GVD
|
||||
rghqsI+DUiGpFNhliAW74tDhAfAM+IY00Zlx2yELPrsVvjuFNQayE1AcYiupxLI9qkJmd7G39Veu
|
||||
L5S+l9bDD0QN+plJeYReZcM21RksReTkzVmGNvl974pszpowDsfuTPJhjI7C6YFyp7N0yQDoHHDX
|
||||
o8mlb5ysk4frGlkLhzeFw42IXe8oOS6J9anFUZZ7WZNGuFHcUDg+eThEWbM0in27AyUjg4/huGyH
|
||||
xsHEdiz9VPm/Hj6EnjSIH8+a0fHgpdGeTPb2KI4RXzs2ce/WXkI8Sz4nxUzvbaH3HiOGqClE8sla
|
||||
TalzI8k+stfsDj/DLYbIvtnTnQbAtCqejRs9RxtKjB2hTdCHPm+1N9OMq8TDrFaR/ID/i5Q1HFOq
|
||||
7NUqELNdDVPk0XWXe9eljySp4IO3ZOFHQjPGyr2gHePjYiKmDSpFuENzx84deHIeSXI5mU7UcegV
|
||||
U+bHo1RNF4zyeojfTtcsLnV9i4H/OzG2ZDiw+AAdWkroUglzkBiVEA6z893fHrIVXMP7chMT3JYz
|
||||
M3ON7OGGXTLvsWvNtGI6ZRGfKYCOAGKrMjTtQW7Grw6iSgccAxgcQjFPesI6pmyJ7SkMofh4y5ZA
|
||||
0TeUNggy6BQcV4VTcWJi9guhlmDM5LKchcqPODi4xe7Ai12Jb5qv3J43EtHBLu0idbk7jq1XPGZp
|
||||
gyFialxSTKDbZUkSfie+OXPF50B1TSbO+28TqAx8uSjIk/FuRQON9WbTIjm4E+0HbWQGO5a8ZWeV
|
||||
/IswpTc5Vzw2Bx1Cm/abrHYUHF9nRctN61IGTqNr8RbWFLdEHtBzh65kQr69AmDHNsSSGS1KEeED
|
||||
mnZUNsw9TEqhAIPnPwY6jJyyXb6iyuENgGsZ4rdanlPo8HmE1sEG3ZC7JKXRAlCLAvqdeEodBmm6
|
||||
DG1qemJiPqmZ+EseHp16ftjXKdVDwNRb+sG5gwn0XorEuaP8bZz5MveQTppeZR2+WlrV7Dm0T0oY
|
||||
xKd+MTVdVZ79cgLwW+5Vh6P2s+pVuj4+RXmm/Lnl5dirVvsWeT/75vXFOBsTw/3E5WKaONrwaSz1
|
||||
QbtbGTQt2f3SfW+Mg2U5mDg5oP1XON/au1Bn3/w/2+8njKE+kn3qlSybY8r715TST4i/e20ucwZc
|
||||
BdING3qKTJqksFTj4EpjX4VdiNQ91ewb0l65dPd1/2TXaPDtwtaL6uTLyf8AAAD//wMA10Lu/OsM
|
||||
AAA=
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 937ed42dee2e621f-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 29 Apr 2025 12:33:48 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=mLRCnpdB3n_6medIZWHnUu8MNRGZsD6riaRhN47PK74-1745930028-1.0.1.1-M2lDM1_V9hNCK0MZrBnFalF3lndC3JkS8zhDOGww_LmOrgdpU9fZLpNZUmyinCQOnlCjDjDYJUECM82ffT1anqBiO1NoDeNp91EPKiK7s.8;
|
||||
path=/; expires=Tue, 29-Apr-25 13:03:48 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=eTrj_ZhCx2XuylS5vYROwUlPrJBwOyrbS2Ki.msl45E-1745930028010-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '10856'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999807'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_bc2d62d8325b2bdd3e98544a66389132
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Guardrail Agent. You
|
||||
are a expert at validating the output of a task. By providing effective feedback
|
||||
if the output is not valid.\nYour personal goal is: Validate the output of the
|
||||
task\n\nTo give my best complete final answer to the task respond using the
|
||||
exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
|
||||
Your final answer must be the great and the most complete as possible, it must
|
||||
be outcome described.\n\nI MUST use these formats, my job depends on it!\nIMPORTANT:
|
||||
Your final answer MUST contain all the information requested in the following
|
||||
format: {\n \"valid\": bool,\n \"feedback\": str | None\n}\n\nIMPORTANT: Ensure
|
||||
the final output does not include any code block markers like ```json or ```python."},
|
||||
{"role": "user", "content": "\n Ensure the following task result complies
|
||||
with the given guardrail.\n\n Task result:\n Here is a comprehensive
|
||||
list of available books on the First World War:\n\n1. **\"The Sleepwalkers:
|
||||
How Europe Went to War in 1914\" by Christopher Clark** \n This book delves
|
||||
into the complex factors that led to the outbreak of the war, offering insights
|
||||
into the political and social dynamics of early 20th century Europe.\n\n2. **\"A
|
||||
World Undone: The Story of the Great War, 1914 to 1918\" by G.J. Meyer** \n Meyer''s
|
||||
expansive narrative covers the entire war with a focus on both military strategies
|
||||
and the human experiences endured by soldiers and civilians alike.\n\n3. **\"All
|
||||
Quiet on the Western Front\" by Erich Maria Remarque** \n A poignant novel
|
||||
that captures the resilience and trauma experienced by German soldiers during
|
||||
World War I, based on the author''s own experiences.\n\n4. **\"The First World
|
||||
War\" by John Keegan** \n Keegan provides a detailed military history of
|
||||
the war, featuring insights on battles, strategies, and the overall impact on
|
||||
global affairs.\n\n5. **\"Goodbye to All That\" by Robert Graves** \n This
|
||||
autobiography recounts the author''s experiences as a soldier during the war,
|
||||
offering a personal and critical perspective on the conflicts and the post-war
|
||||
era.\n\n6. **\"Catastrophe 1914: Europe Goes to War\" by Max Hastings** \n Hastings
|
||||
chronicles the events leading up to World War I and the early battles, detailing
|
||||
the war''s initial impact on European societies.\n\n7. **\"The War That Ended
|
||||
Peace: The Road to 1914\" by Margaret MacMillan** \n MacMillan explores the
|
||||
political and historical factors that contributed to the outbreak of war, emphasizing
|
||||
the decisions made by leaders across Europe.\n\n8. **\"The First World War:
|
||||
A Complete History\" by Martin Gilbert** \n This complete history takes readers
|
||||
through the entirety of the war, from its causes to its aftermath, using a wide
|
||||
range of sources.\n\n9. **\"1914: The Year the World Ended\" by Paul Ham** \n Ham
|
||||
focuses on the pivotal year of 1914 and the early war''s devastation, analyzing
|
||||
its long-lasting effects on the world.\n\n10. **\"War Horse\" by Michael Morpurgo** \n This
|
||||
children''s novel tells the story of a horse and his experiences during the
|
||||
war, highlighting the bond between animals and humans amidst the chaos.\n\nEach
|
||||
of these books offers unique perspectives and rich details about the First World
|
||||
War, making them valuable resources for anyone interested in this pivotal period
|
||||
in history.\n\n Guardrail:\n Ensure the authors are from Italy\n \n Your
|
||||
task:\n - Confirm if the Task result complies with the guardrail.\n -
|
||||
If not, provide clear feedback explaining what is wrong (e.g., by how much it
|
||||
violates the rule, or what specific part fails).\n - Focus only on identifying
|
||||
issues \u2014 do not propose corrections.\n - If the Task result complies
|
||||
with the guardrail, saying that is valid\n "}], "model": "gpt-4o-mini",
|
||||
"stop": ["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '3917'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=mLRCnpdB3n_6medIZWHnUu8MNRGZsD6riaRhN47PK74-1745930028-1.0.1.1-M2lDM1_V9hNCK0MZrBnFalF3lndC3JkS8zhDOGww_LmOrgdpU9fZLpNZUmyinCQOnlCjDjDYJUECM82ffT1anqBiO1NoDeNp91EPKiK7s.8;
|
||||
_cfuvid=eTrj_ZhCx2XuylS5vYROwUlPrJBwOyrbS2Ki.msl45E-1745930028010-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.9
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFPLbtswELz7KxY8y4HsJHWsW9wiQVq0hzx6aBUIa3IlMaFIlVw5NQL/
|
||||
e0Epiew+gF4EamZnuC8+TwCEViIDIWtk2bRmurqm29X1+/PNzYfL06+fFV8svz2sttuF+XHzUSRR
|
||||
4dYPJPlVdSRd0xpi7exAS0/IFF1ni5PT5XE6m896onGKTJRVLU9P3LTRVk/n6fxkmi6ms7MXde20
|
||||
pCAy+D4BAHjuvzFPq+inyCBNXpGGQsCKRPYWBCC8MxERGIIOjJZFMpLSWSbbp35bu66qOYMrsO4J
|
||||
JFqo9IYAoYr5A9rwRB4gtxfaooHz/j+D59wC5GKDRqtcZFCiCZQMYEmk1igfI56LL84SuBK4JsCO
|
||||
a+cDGB2YFGjbo4zhETyFzjCgJyi9a+CK0WyP4NyYA2VDNraY1BjpuCYP0nWWvaaQQOhkDRjgknyD
|
||||
dpv0BnefEkCrhvPNUS5yu9vviaeyCxjnYjtj9gi01jHGS/tp3L8wu7f+G1e13q3Db1JRaqtDXXjC
|
||||
4GzsdWDXip7dTQDu+zl3B6MTrXdNywW7R+qvWywXg58Y12tk370sgWDHaEb87PRVdeBXKGLUJuxt
|
||||
ipAoa1KjdFwr7JR2e8Rkr+o/s/mb91C5ttX/2I+ElNQyqaL1pLQ8rHgM8xRf37/C3rrcJywC+Y2W
|
||||
VLAmHyehqMTODG9ChG1gaopS24p86/XwMMq2SI+X87P5PF2mYrKb/AIAAP//AwD77a3iJgQAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 937ed6bd68faa435-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 29 Apr 2025 12:35:23 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '1138'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999072'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_2ba1be014a5974ba354aff564e26516a
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
File diff suppressed because it is too large
Load Diff
250
tests/cassettes/test_task_guardrail_process_output.yaml
Normal file
250
tests/cassettes/test_task_guardrail_process_output.yaml
Normal file
@@ -0,0 +1,250 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Guardrail Agent. You
|
||||
are a expert at validating the output of a task. By providing effective feedback
|
||||
if the output is not valid.\nYour personal goal is: Validate the output of the
|
||||
task\n\nTo give my best complete final answer to the task respond using the
|
||||
exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
|
||||
Your final answer must be the great and the most complete as possible, it must
|
||||
be outcome described.\n\nI MUST use these formats, my job depends on it!\nIMPORTANT:
|
||||
Your final answer MUST contain all the information requested in the following
|
||||
format: {\n \"valid\": bool,\n \"feedback\": str | None\n}\n\nIMPORTANT: Ensure
|
||||
the final output does not include any code block markers like ```json or ```python."},
|
||||
{"role": "user", "content": "\n Ensure the following task result complies
|
||||
with the given guardrail.\n\n Task result:\n \n Lorem Ipsum
|
||||
is simply dummy text of the printing and typesetting industry. Lorem Ipsum has
|
||||
been the industry''s standard dummy text ever\n \n\n Guardrail:\n Ensure
|
||||
the result has less than 10 words\n \n Your task:\n - Confirm
|
||||
if the Task result complies with the guardrail.\n - If not, provide clear
|
||||
feedback explaining what is wrong (e.g., by how much it violates the rule, or
|
||||
what specific part fails).\n - Focus only on identifying issues \u2014
|
||||
do not propose corrections.\n - If the Task result complies with the
|
||||
guardrail, saying that is valid\n "}], "model": "gpt-4o-mini", "stop":
|
||||
["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1629'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.9
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jFPLbtswELz7KxY824GkxLGtW4KiQB+XBmkRtAqENbmSmFAkQVJ2UsP/
|
||||
HlByLKdNgV4IcGdnOPvgbgLApGA5MN5g4K1Vs+ub718rm324+5z+/CLt1dXD5fU3s1jd3Wx//GbT
|
||||
yDDrB+LhlXXGTWsVBWn0AHNHGCiqpouL+XKVZum8B1ojSEVabcPswsxaqeUsS7KLWbKYpcsDuzGS
|
||||
k2c5/JoAAOz6M/rUgp5YDsn0NdKS91gTy49JAMwZFSMMvZc+oA5sOoLc6EC6t37bmK5uQg6fQJst
|
||||
cNRQyw0BQh39A2q/JQdQ6I9So4Kr/p7DrtAABdugkqJgOVSoPE2HYEUk1sgfY7xgtw1BQP8Ijnyn
|
||||
AsTHUWoP6SVsjRN+CvTEiYTUNYSGoO7QCYdSgZKtDGAqqCiaCA1qSJOBBetnOAicFazQ+9MCHVWd
|
||||
x9hk3Sl1AqDWJmAcUt/a+wOyPzZTmdo6s/Z/UFkltfRN6Qi90bFxPhjLenQ/Abjvh9a9mQOzzrQ2
|
||||
lME8Uv/ceTIf9Ni4KyM6Tw9gMAHVCWt+OX1HrxQUUCp/MnbGkTckRuq4I9gJaU6AyUnVf7t5T3uo
|
||||
XOr6f+RHgHOygURpHQnJ31Y8pjmKX+lfaccu94aZJ7eRnMogycVJCKqwU8OCM//sA7VlJXVNzjo5
|
||||
bHlly+R8lS2zLFklbLKfvAAAAP//AwCHe/Jh8wMAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 937b20ddf9607def-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 29 Apr 2025 01:46:56 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=nHa2kVJI_yO1RIsmZcEednJ1e9UVy1liv_sjBNtSj7Q-1745891216-1.0.1.1-jUH9kFawVBjnbq8sIL2.MQx.p7JvBZWUhqlkNKRlStWSgQxT0eZMPcgq9TCQoJAjuyNwhqfpK4HuX6x5n8UbQgAb6JrWJEG823e6GpGROEA;
|
||||
path=/; expires=Tue, 29-Apr-25 02:16:56 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=gg2UeahMCOOR8YhitRtzDwENMOnTOuQdyTMVJVHG0Mg-1745891216085-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '896'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999631'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_859221ed1aedb26cc9d335004ccf183e
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "system", "content": "You are Guardrail Agent. You
|
||||
are a expert at validating the output of a task. By providing effective feedback
|
||||
if the output is not valid.\nYour personal goal is: Validate the output of the
|
||||
task\n\nTo give my best complete final answer to the task respond using the
|
||||
exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
|
||||
Your final answer must be the great and the most complete as possible, it must
|
||||
be outcome described.\n\nI MUST use these formats, my job depends on it!\nIMPORTANT:
|
||||
Your final answer MUST contain all the information requested in the following
|
||||
format: {\n \"valid\": bool,\n \"feedback\": str | None\n}\n\nIMPORTANT: Ensure
|
||||
the final output does not include any code block markers like ```json or ```python."},
|
||||
{"role": "user", "content": "\n Ensure the following task result complies
|
||||
with the given guardrail.\n\n Task result:\n \n Lorem Ipsum
|
||||
is simply dummy text of the printing and typesetting industry. Lorem Ipsum has
|
||||
been the industry''s standard dummy text ever\n \n\n Guardrail:\n Ensure
|
||||
the result has less than 500 words\n \n Your task:\n -
|
||||
Confirm if the Task result complies with the guardrail.\n - If not, provide
|
||||
clear feedback explaining what is wrong (e.g., by how much it violates the rule,
|
||||
or what specific part fails).\n - Focus only on identifying issues \u2014
|
||||
do not propose corrections.\n - If the Task result complies with the
|
||||
guardrail, saying that is valid\n "}], "model": "gpt-4o-mini", "stop":
|
||||
["\nObservation:"]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '1630'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=nHa2kVJI_yO1RIsmZcEednJ1e9UVy1liv_sjBNtSj7Q-1745891216-1.0.1.1-jUH9kFawVBjnbq8sIL2.MQx.p7JvBZWUhqlkNKRlStWSgQxT0eZMPcgq9TCQoJAjuyNwhqfpK4HuX6x5n8UbQgAb6JrWJEG823e6GpGROEA;
|
||||
_cfuvid=gg2UeahMCOOR8YhitRtzDwENMOnTOuQdyTMVJVHG0Mg-1745891216085-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- '600.0'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.9
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAAwAAAP//jJJNb9swDIbv/hWEzvHgfHRpfesOG3opsGE7LYXBSLStRZY0iU43BPnv
|
||||
g5wPu10H7GLAfPhSfEkeMgChlShByBZZdt7kH758e1wzbnfbO6o/f1osV3T/+BO7UNNDIWZJ4bY/
|
||||
SPJF9U66zhti7ewJy0DIlKrO16ub27v5srgZQOcUmSRrPOcrl3fa6nxRLFZ5sc7nt2d167SkKEr4
|
||||
ngEAHIZv6tMq+iVKKGaXSEcxYkOivCYBiOBMigiMUUdGy2I2Qukskx1a/9q6vmm5hAew7hkkWmj0
|
||||
ngChSf0D2vhMAWBjP2qLBu6H/xIOGwuwEXs0Wm1ECRx6mp1iNZHaotylsO2N2djj9PFAdR/RnOEE
|
||||
oLWOMQ1wsP10JserUeMaH9w2vpKKWlsd2yoQRmeTqcjOi4EeM4CnYaD9ixkJH1znuWK3o+G583KG
|
||||
4Vz2ONLF7RmyYzQT1XI5e6NepYhRmzhZiZAoW1KjdNwf9kq7Ccgmrv/u5q3aJ+faNv9TfgRSkmdS
|
||||
lQ+ktHzpeEwLlM78X2nXKQ8Ni0hhryVVrCmkTSiqsTen4xPxd2TqqlrbhoIP+nSBta/SueD7QtWF
|
||||
yI7ZHwAAAP//AwAiLXhqjwMAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 937b2311ee091b1b-GRU
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 29 Apr 2025 01:48:26 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '610'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999631'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_c136835c16be6bc1e4d820f239c4b620
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -6,7 +6,7 @@ research_task:
|
||||
expected_output: >
|
||||
A list with 10 bullet points of the most relevant information about {topic}
|
||||
agent: researcher
|
||||
guardrail: make sure each bullet contains a minimum of 100 words
|
||||
guardrail: ensure each bullet contains its source
|
||||
|
||||
reporting_task:
|
||||
description: >
|
||||
|
||||
@@ -143,31 +143,17 @@ def test_agent_function_calling_llm():
|
||||
), "agent's function_calling_llm is incorrect"
|
||||
|
||||
|
||||
# VCR could not record the request to localhost from Docker to get it version, so we need to mock the tool.
|
||||
# TODO: We can remove this mock after some issue such as https://github.com/kevin1024/vcrpy/issues/519 been addressed
|
||||
@pytest.fixture
|
||||
def code_interpreter_tool_mock():
|
||||
with patch(
|
||||
"crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.CodeInterpreterTool._run",
|
||||
return_value="(True, 'good result')",
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
def test_task_guardrail(code_interpreter_tool_mock):
|
||||
def test_task_guardrail():
|
||||
crew = InternalCrew()
|
||||
research_task = crew.research_task()
|
||||
assert (
|
||||
research_task.guardrail
|
||||
== "make sure each bullet contains a minimum of 100 words"
|
||||
)
|
||||
assert research_task.guardrail == "ensure each bullet contains its source"
|
||||
|
||||
reporting_task = crew.reporting_task()
|
||||
assert reporting_task.guardrail is None
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_before_kickoff_modification(code_interpreter_tool_mock):
|
||||
def test_before_kickoff_modification():
|
||||
crew = InternalCrew()
|
||||
inputs = {"topic": "LLMs"}
|
||||
result = crew.crew().kickoff(inputs=inputs)
|
||||
@@ -175,7 +161,7 @@ def test_before_kickoff_modification(code_interpreter_tool_mock):
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_after_kickoff_modification(code_interpreter_tool_mock):
|
||||
def test_after_kickoff_modification():
|
||||
crew = InternalCrew()
|
||||
# Assuming the crew execution returns a dict
|
||||
result = crew.crew().kickoff({"topic": "LLMs"})
|
||||
@@ -186,14 +172,14 @@ def test_after_kickoff_modification(code_interpreter_tool_mock):
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_before_kickoff_with_none_input(code_interpreter_tool_mock):
|
||||
def test_before_kickoff_with_none_input():
|
||||
crew = InternalCrew()
|
||||
crew.crew().kickoff(None)
|
||||
# Test should pass without raising exceptions
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_multiple_before_after_kickoff(code_interpreter_tool_mock):
|
||||
def test_multiple_before_after_kickoff():
|
||||
@CrewBase
|
||||
class MultipleHooksCrew:
|
||||
agents: List[BaseAgent]
|
||||
|
||||
@@ -139,127 +139,32 @@ def sample_agent():
|
||||
return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_guardrail_using_llm(sample_agent):
|
||||
task = Task(
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
guardrail="Ensure the output is equal to 'good result'",
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.task_guardrail.TaskGuardrail.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
) as mock_guardrail:
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
assert mock_guardrail.call_count == 2
|
||||
|
||||
task.guardrail = TaskGuardrail(
|
||||
description="Ensure the output is equal to 'good result'",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
with patch(
|
||||
"crewai.tasks.task_guardrail.TaskGuardrail.__call__",
|
||||
side_effect=[(False, "bad result"), (True, "good result")],
|
||||
) as mock_guardrail:
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
assert mock_guardrail.call_count == 2
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def task_output():
|
||||
return TaskOutput(
|
||||
raw="Test output",
|
||||
raw="""
|
||||
Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever
|
||||
""",
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
agent="Test Agent",
|
||||
)
|
||||
|
||||
|
||||
def test_task_guardrail_initialization_no_llm(task_output):
|
||||
"""Test TaskGuardrail initialization fails without LLM"""
|
||||
with pytest.raises(ValueError, match="Provide a valid LLM to the TaskGuardrail"):
|
||||
TaskGuardrail(description="Test")(task_output)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm():
|
||||
llm = Mock(spec=LLM)
|
||||
llm.call.return_value = """
|
||||
output = 'Sample book data'
|
||||
if isinstance(output, str):
|
||||
result = (True, output)
|
||||
else:
|
||||
result = (False, 'Invalid output format')
|
||||
print(result)
|
||||
"""
|
||||
return llm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tool_run_output",
|
||||
[
|
||||
{
|
||||
"output": "(True, 'Valid output')",
|
||||
"expected_result": True,
|
||||
"expected_output": "Valid output",
|
||||
},
|
||||
{
|
||||
"output": "(False, 'Invalid output format')",
|
||||
"expected_result": False,
|
||||
"expected_output": "Invalid output format",
|
||||
},
|
||||
{
|
||||
"output": "Something went wrong while running the code, Invalid output format",
|
||||
"expected_result": False,
|
||||
"expected_output": "Something went wrong while running the code, Invalid output format",
|
||||
},
|
||||
{
|
||||
"output": "No result variable found",
|
||||
"expected_result": False,
|
||||
"expected_output": "No result variable found",
|
||||
},
|
||||
{
|
||||
"output": (False, "Invalid output format"),
|
||||
"expected_result": False,
|
||||
"expected_output": "Invalid output format",
|
||||
},
|
||||
{
|
||||
"output": "bla-bla-bla",
|
||||
"expected_result": False,
|
||||
"expected_output": "Error parsing result: malformed node or string on line 1",
|
||||
},
|
||||
],
|
||||
)
|
||||
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||
def test_task_guardrail_execute_code(mock_run, mock_llm, tool_run_output, task_output):
|
||||
mock_run.return_value = tool_run_output["output"]
|
||||
|
||||
guardrail = TaskGuardrail(description="Test validation", llm=mock_llm)
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_task_guardrail_process_output(task_output):
|
||||
guardrail = TaskGuardrail(description="Ensure the result has less than 10 words")
|
||||
|
||||
result = guardrail(task_output)
|
||||
assert result[0] == tool_run_output["expected_result"]
|
||||
assert tool_run_output["expected_output"] in result[1]
|
||||
assert result[0] is False
|
||||
|
||||
assert "exceeding the guardrail limit of fewer than" in result[1].lower()
|
||||
|
||||
@patch("crewai_tools.CodeInterpreterTool.run")
|
||||
def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
|
||||
mock_run.return_value = "(True, 'Valid output')"
|
||||
additional_instructions = (
|
||||
"This is an additional instruction created by the user follow it strictly"
|
||||
)
|
||||
guardrail = TaskGuardrail(
|
||||
description="Test validation",
|
||||
llm=mock_llm,
|
||||
additional_instructions=additional_instructions,
|
||||
)
|
||||
guardrail = TaskGuardrail(description="Ensure the result has less than 500 words")
|
||||
|
||||
guardrail(task_output)
|
||||
|
||||
assert additional_instructions in str(mock_llm.call.call_args)
|
||||
result = guardrail(task_output)
|
||||
assert result[0] is True
|
||||
assert result[1] == task_output.raw
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
@@ -287,26 +192,13 @@ def test_guardrail_emits_events(sample_agent):
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
guardrail="Ensure the output is equal to 'good result'",
|
||||
description="Gather information about available books on the First World War",
|
||||
agent=sample_agent,
|
||||
expected_output="A list of available books on the First World War",
|
||||
guardrail="Ensure the authors are from Italy",
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"crewai_tools.CodeInterpreterTool.run",
|
||||
side_effect=[
|
||||
"Something went wrong while running the code",
|
||||
(True, "good result"),
|
||||
],
|
||||
),
|
||||
patch(
|
||||
"crewai.tasks.task_guardrail.TaskGuardrail.generate_code",
|
||||
return_value="""def guardrail(result: TaskOutput):
|
||||
return (True, result.raw.upper())""",
|
||||
),
|
||||
):
|
||||
task.execute_sync(agent=sample_agent)
|
||||
result = task.execute_sync(agent=sample_agent)
|
||||
|
||||
def custom_guardrail(result: TaskOutput):
|
||||
return (True, "good result from callable function")
|
||||
@@ -320,35 +212,26 @@ def test_guardrail_emits_events(sample_agent):
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
expected_started_events = [
|
||||
{
|
||||
"guardrail": """def guardrail(result: TaskOutput):
|
||||
return (True, result.raw.upper())""",
|
||||
"retry_count": 0,
|
||||
},
|
||||
{
|
||||
"guardrail": """def guardrail(result: TaskOutput):
|
||||
return (True, result.raw.upper())""",
|
||||
"retry_count": 1,
|
||||
},
|
||||
{"guardrail": "Ensure the authors are from Italy", "retry_count": 0},
|
||||
{"guardrail": "Ensure the authors are from Italy", "retry_count": 1},
|
||||
{
|
||||
"guardrail": """def custom_guardrail(result: TaskOutput):
|
||||
return (True, "good result from callable function")""",
|
||||
"retry_count": 0,
|
||||
},
|
||||
]
|
||||
|
||||
expected_completed_events = [
|
||||
{
|
||||
"success": False,
|
||||
"result": None,
|
||||
"error": "Something went wrong while running the code",
|
||||
"error": "The task result does not comply with the guardrail because none of "
|
||||
"the listed authors are from Italy. All authors mentioned are from "
|
||||
"different countries, including Germany, the UK, the USA, and others, "
|
||||
"which violates the requirement that authors must be Italian.",
|
||||
"retry_count": 0,
|
||||
},
|
||||
{
|
||||
"success": True,
|
||||
"result": "good result",
|
||||
"error": None,
|
||||
"retry_count": 1,
|
||||
},
|
||||
{"success": True, "result": result.raw, "error": None, "retry_count": 1},
|
||||
{
|
||||
"success": True,
|
||||
"result": "good result from callable function",
|
||||
@@ -360,20 +243,23 @@ def test_guardrail_emits_events(sample_agent):
|
||||
assert completed_guardrail == expected_completed_events
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unsafe_mode", [True, False])
|
||||
def test_task_guardrail_force_code_tool_unsafe_mode(mock_llm, task_output, unsafe_mode):
|
||||
guardrail = TaskGuardrail(
|
||||
description="Test validation", llm=mock_llm, unsafe_mode=unsafe_mode
|
||||
)
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_guardrail_when_an_error_occurs(sample_agent, task_output):
|
||||
with (
|
||||
patch(
|
||||
"crewai_tools.CodeInterpreterTool.__init__", return_value=None
|
||||
) as mock_init,
|
||||
patch(
|
||||
"crewai_tools.CodeInterpreterTool.run", return_value=(True, "Valid output")
|
||||
"crewai.Agent.kickoff",
|
||||
side_effect=Exception("Unexpected error"),
|
||||
),
|
||||
pytest.raises(
|
||||
Exception,
|
||||
match="Error while validating the task output: Unexpected error",
|
||||
),
|
||||
):
|
||||
result = guardrail(task_output)
|
||||
|
||||
mock_init.assert_called_once_with(code=ANY, unsafe_mode=unsafe_mode)
|
||||
assert result == (True, "Valid output")
|
||||
task = Task(
|
||||
description="Gather information about available books on the First World War",
|
||||
agent=sample_agent,
|
||||
expected_output="A list of available books on the First World War",
|
||||
guardrail="Ensure the authors are from Italy",
|
||||
max_retries=0,
|
||||
)
|
||||
task.execute_sync(agent=sample_agent)
|
||||
|
||||
Reference in New Issue
Block a user