Merge branch 'main' into feat/docling

This commit is contained in:
Brandon Hancock (bhancock_ai)
2024-12-23 10:13:48 -05:00
committed by GitHub
10 changed files with 553 additions and 26 deletions

View File

@@ -26237,7 +26237,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}], "model": "gpt-4o"}'
headers:
@@ -26590,7 +26590,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -26941,7 +26941,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -27292,7 +27292,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -27647,7 +27647,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -28005,7 +28005,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -28364,7 +28364,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -28718,7 +28718,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -29082,7 +29082,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -29441,7 +29441,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -29802,7 +29802,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -30170,7 +30170,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -30533,7 +30533,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -30907,7 +30907,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -31273,7 +31273,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -31644,7 +31644,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the
@@ -32015,7 +32015,7 @@ interactions:
answer."}, {"role": "user", "content": "I did it wrong. Invalid Format: I missed
the ''Action:'' after ''Thought:''. I will do right next, and don''t use a tool
I have already used.\n\nIf you don''t need to use any more tools, you must give
your best complete final answer, make sure it satisfy the expect criteria, use
your best complete final answer, make sure it satisfies the expected criteria, use
the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer:
my best complete final answer to the task.\n\n"}, {"role": "user", "content":
"I did it wrong. Tried to both perform Action and give a Final Answer at the

View File

@@ -247,7 +247,7 @@ interactions:
{"role": "user", "content": "I did it wrong. Invalid Format: I missed the ''Action:''
after ''Thought:''. I will do right next, and don''t use a tool I have already
used.\n\nIf you don''t need to use any more tools, you must give your best complete
final answer, make sure it satisfy the expect criteria, use the EXACT format
final answer, make sure it satisfies the expected criteria, use the EXACT format
below:\n\nThought: I now can give a great answer\nFinal Answer: my best complete
final answer to the task.\n\n"}], "model": "o1-preview"}'
headers:

View File

@@ -0,0 +1,134 @@
"""Tests for task guardrails functionality."""
from unittest.mock import Mock
import pytest
from crewai.task import Task
from crewai.tasks.task_output import TaskOutput
def test_task_without_guardrail():
"""Test that tasks work normally without guardrails (backward compatibility)."""
agent = Mock()
agent.role = "test_agent"
agent.execute_task.return_value = "test result"
agent.crew = None
task = Task(
description="Test task",
expected_output="Output"
)
result = task.execute_sync(agent=agent)
assert isinstance(result, TaskOutput)
assert result.raw == "test result"
def test_task_with_successful_guardrail():
"""Test that successful guardrail validation passes transformed result."""
def guardrail(result: TaskOutput):
return (True, result.raw.upper())
agent = Mock()
agent.role = "test_agent"
agent.execute_task.return_value = "test result"
agent.crew = None
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail
)
result = task.execute_sync(agent=agent)
assert isinstance(result, TaskOutput)
assert result.raw == "TEST RESULT"
def test_task_with_failing_guardrail():
"""Test that failing guardrail triggers retry with error context."""
def guardrail(result: TaskOutput):
return (False, "Invalid format")
agent = Mock()
agent.role = "test_agent"
agent.execute_task.side_effect = [
"bad result",
"good result"
]
agent.crew = None
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=1
)
# First execution fails guardrail, second succeeds
agent.execute_task.side_effect = ["bad result", "good result"]
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
assert "Task failed guardrail validation" in str(exc_info.value)
assert task.retry_count == 1
def test_task_with_guardrail_retries():
"""Test that guardrail respects max_retries configuration."""
def guardrail(result: TaskOutput):
return (False, "Invalid format")
agent = Mock()
agent.role = "test_agent"
agent.execute_task.return_value = "bad result"
agent.crew = None
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=2
)
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
assert task.retry_count == 2
assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
assert "Invalid format" in str(exc_info.value)
def test_guardrail_error_in_context():
"""Test that guardrail error is passed in context for retry."""
def guardrail(result: TaskOutput):
return (False, "Expected JSON, got string")
agent = Mock()
agent.role = "test_agent"
agent.crew = None
task = Task(
description="Test task",
expected_output="Output",
guardrail=guardrail,
max_retries=1
)
# Mock execute_task to succeed on second attempt
first_call = True
def execute_task(task, context, tools):
nonlocal first_call
if first_call:
first_call = False
return "invalid"
return '{"valid": "json"}'
agent.execute_task.side_effect = execute_task
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
assert "Task failed guardrail validation" in str(exc_info.value)
assert "Expected JSON, got string" in str(exc_info.value)