mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 07:38:29 +00:00
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Update Test Durations / update-durations (3.10) (push) Has been cancelled
Update Test Durations / update-durations (3.11) (push) Has been cancelled
Update Test Durations / update-durations (3.12) (push) Has been cancelled
Update Test Durations / update-durations (3.13) (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
This commit adds the source attribute to LLM Guardrail event calls to identify the Lite Agent or Task that executed the guardrail.
307 lines
9.6 KiB
Python
307 lines
9.6 KiB
Python
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
from crewai import Agent, Task
|
|
from crewai.events.event_bus import crewai_event_bus
|
|
from crewai.events.event_types import (
|
|
LLMGuardrailCompletedEvent,
|
|
LLMGuardrailStartedEvent,
|
|
)
|
|
from crewai.llm import LLM
|
|
from crewai.tasks.hallucination_guardrail import HallucinationGuardrail
|
|
from crewai.tasks.llm_guardrail import LLMGuardrail
|
|
from crewai.tasks.task_output import TaskOutput
|
|
|
|
|
|
def test_task_without_guardrail():
|
|
"""Test that tasks work normally without guardrails (backward compatibility)."""
|
|
agent = Mock()
|
|
agent.role = "test_agent"
|
|
agent.execute_task.return_value = "test result"
|
|
agent.crew = None
|
|
|
|
task = Task(description="Test task", expected_output="Output")
|
|
|
|
result = task.execute_sync(agent=agent)
|
|
assert isinstance(result, TaskOutput)
|
|
assert result.raw == "test result"
|
|
|
|
|
|
def test_task_with_successful_guardrail_func():
|
|
"""Test that successful guardrail validation passes transformed result."""
|
|
|
|
def guardrail(result: TaskOutput):
|
|
return (True, result.raw.upper())
|
|
|
|
agent = Mock()
|
|
agent.role = "test_agent"
|
|
agent.execute_task.return_value = "test result"
|
|
agent.crew = None
|
|
|
|
task = Task(description="Test task", expected_output="Output", guardrail=guardrail)
|
|
|
|
result = task.execute_sync(agent=agent)
|
|
assert isinstance(result, TaskOutput)
|
|
assert result.raw == "TEST RESULT"
|
|
|
|
|
|
def test_task_with_failing_guardrail():
|
|
"""Test that failing guardrail triggers retry with error context."""
|
|
|
|
def guardrail(result: TaskOutput):
|
|
return (False, "Invalid format")
|
|
|
|
agent = Mock()
|
|
agent.role = "test_agent"
|
|
agent.execute_task.side_effect = ["bad result", "good result"]
|
|
agent.crew = None
|
|
|
|
task = Task(
|
|
description="Test task",
|
|
expected_output="Output",
|
|
guardrail=guardrail,
|
|
guardrail_max_retries=1,
|
|
)
|
|
|
|
# First execution fails guardrail, second succeeds
|
|
agent.execute_task.side_effect = ["bad result", "good result"]
|
|
with pytest.raises(Exception) as exc_info:
|
|
task.execute_sync(agent=agent)
|
|
|
|
assert "Task failed guardrail validation" in str(exc_info.value)
|
|
assert task.retry_count == 1
|
|
|
|
|
|
def test_task_with_guardrail_retries():
|
|
"""Test that guardrail respects max_retries configuration."""
|
|
|
|
def guardrail(result: TaskOutput):
|
|
return (False, "Invalid format")
|
|
|
|
agent = Mock()
|
|
agent.role = "test_agent"
|
|
agent.execute_task.return_value = "bad result"
|
|
agent.crew = None
|
|
|
|
task = Task(
|
|
description="Test task",
|
|
expected_output="Output",
|
|
guardrail=guardrail,
|
|
guardrail_max_retries=2,
|
|
)
|
|
|
|
with pytest.raises(Exception) as exc_info:
|
|
task.execute_sync(agent=agent)
|
|
|
|
assert task.retry_count == 2
|
|
assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
|
|
assert "Invalid format" in str(exc_info.value)
|
|
|
|
|
|
def test_guardrail_error_in_context():
|
|
"""Test that guardrail error is passed in context for retry."""
|
|
|
|
def guardrail(result: TaskOutput):
|
|
return (False, "Expected JSON, got string")
|
|
|
|
agent = Mock()
|
|
agent.role = "test_agent"
|
|
agent.crew = None
|
|
|
|
task = Task(
|
|
description="Test task",
|
|
expected_output="Output",
|
|
guardrail=guardrail,
|
|
guardrail_max_retries=1,
|
|
)
|
|
|
|
# Mock execute_task to succeed on second attempt
|
|
first_call = True
|
|
|
|
def execute_task(task, context, tools):
|
|
nonlocal first_call
|
|
if first_call:
|
|
first_call = False
|
|
return "invalid"
|
|
return '{"valid": "json"}'
|
|
|
|
agent.execute_task.side_effect = execute_task
|
|
|
|
with pytest.raises(Exception) as exc_info:
|
|
task.execute_sync(agent=agent)
|
|
|
|
assert "Task failed guardrail validation" in str(exc_info.value)
|
|
assert "Expected JSON, got string" in str(exc_info.value)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_agent():
|
|
return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
|
|
|
|
|
|
@pytest.fixture
|
|
def task_output():
|
|
return TaskOutput(
|
|
raw="""
|
|
Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever
|
|
""",
|
|
description="Test task",
|
|
expected_output="Output",
|
|
agent="Test Agent",
|
|
)
|
|
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
|
def test_task_guardrail_process_output(task_output):
|
|
guardrail = LLMGuardrail(
|
|
description="Ensure the result has less than 10 words", llm=LLM(model="gpt-4o")
|
|
)
|
|
|
|
result = guardrail(task_output)
|
|
assert result[0] is False
|
|
|
|
assert "exceeding the guardrail limit of fewer than" in result[1].lower()
|
|
|
|
guardrail = LLMGuardrail(
|
|
description="Ensure the result has less than 500 words", llm=LLM(model="gpt-4o")
|
|
)
|
|
|
|
result = guardrail(task_output)
|
|
assert result[0] is True
|
|
assert result[1] == task_output.raw
|
|
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
|
def test_guardrail_emits_events(sample_agent):
|
|
started_guardrail = []
|
|
completed_guardrail = []
|
|
|
|
task = Task(
|
|
description="Gather information about available books on the First World War",
|
|
agent=sample_agent,
|
|
expected_output="A list of available books on the First World War",
|
|
guardrail="Ensure the authors are from Italy",
|
|
)
|
|
|
|
with crewai_event_bus.scoped_handlers():
|
|
|
|
@crewai_event_bus.on(LLMGuardrailStartedEvent)
|
|
def handle_guardrail_started(source, event):
|
|
assert source == task
|
|
started_guardrail.append(
|
|
{"guardrail": event.guardrail, "retry_count": event.retry_count}
|
|
)
|
|
|
|
@crewai_event_bus.on(LLMGuardrailCompletedEvent)
|
|
def handle_guardrail_completed(source, event):
|
|
assert source == task
|
|
completed_guardrail.append(
|
|
{
|
|
"success": event.success,
|
|
"result": event.result,
|
|
"error": event.error,
|
|
"retry_count": event.retry_count,
|
|
}
|
|
)
|
|
|
|
result = task.execute_sync(agent=sample_agent)
|
|
|
|
def custom_guardrail(result: TaskOutput):
|
|
return (True, "good result from callable function")
|
|
|
|
task = Task(
|
|
description="Test task",
|
|
expected_output="Output",
|
|
guardrail=custom_guardrail,
|
|
)
|
|
|
|
task.execute_sync(agent=sample_agent)
|
|
|
|
expected_started_events = [
|
|
{"guardrail": "Ensure the authors are from Italy", "retry_count": 0},
|
|
{"guardrail": "Ensure the authors are from Italy", "retry_count": 1},
|
|
{
|
|
"guardrail": """def custom_guardrail(result: TaskOutput):
|
|
return (True, "good result from callable function")""",
|
|
"retry_count": 0,
|
|
},
|
|
]
|
|
|
|
expected_completed_events = [
|
|
{
|
|
"success": False,
|
|
"result": None,
|
|
"error": "The task result does not comply with the guardrail because none of "
|
|
"the listed authors are from Italy. All authors mentioned are from "
|
|
"different countries, including Germany, the UK, the USA, and others, "
|
|
"which violates the requirement that authors must be Italian.",
|
|
"retry_count": 0,
|
|
},
|
|
{"success": True, "result": result.raw, "error": None, "retry_count": 1},
|
|
{
|
|
"success": True,
|
|
"result": "good result from callable function",
|
|
"error": None,
|
|
"retry_count": 0,
|
|
},
|
|
]
|
|
assert started_guardrail == expected_started_events
|
|
assert completed_guardrail == expected_completed_events
|
|
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
|
def test_guardrail_when_an_error_occurs(sample_agent, task_output):
|
|
with (
|
|
patch(
|
|
"crewai.Agent.kickoff",
|
|
side_effect=Exception("Unexpected error"),
|
|
),
|
|
pytest.raises(
|
|
Exception,
|
|
match="Error while validating the task output: Unexpected error",
|
|
),
|
|
):
|
|
task = Task(
|
|
description="Gather information about available books on the First World War",
|
|
agent=sample_agent,
|
|
expected_output="A list of available books on the First World War",
|
|
guardrail="Ensure the authors are from Italy",
|
|
guardrail_max_retries=0,
|
|
)
|
|
task.execute_sync(agent=sample_agent)
|
|
|
|
|
|
def test_hallucination_guardrail_integration():
|
|
"""Test that HallucinationGuardrail integrates properly with the task system."""
|
|
agent = Mock()
|
|
agent.role = "test_agent"
|
|
agent.execute_task.return_value = "test result"
|
|
agent.crew = None
|
|
|
|
mock_llm = Mock(spec=LLM)
|
|
guardrail = HallucinationGuardrail(
|
|
context="Test reference context for validation", llm=mock_llm, threshold=8.0
|
|
)
|
|
|
|
task = Task(
|
|
description="Test task with hallucination guardrail",
|
|
expected_output="Valid output",
|
|
guardrail=guardrail,
|
|
)
|
|
|
|
result = task.execute_sync(agent=agent)
|
|
assert isinstance(result, TaskOutput)
|
|
assert result.raw == "test result"
|
|
|
|
|
|
def test_hallucination_guardrail_description_in_events():
|
|
"""Test that HallucinationGuardrail description appears correctly in events."""
|
|
mock_llm = Mock(spec=LLM)
|
|
guardrail = HallucinationGuardrail(context="Test context", llm=mock_llm)
|
|
|
|
assert guardrail.description == "HallucinationGuardrail (no-op)"
|
|
|
|
event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0)
|
|
assert event.guardrail == "HallucinationGuardrail (no-op)"
|