diff --git a/src/crewai/tasks/hallucination_guardrail.py b/src/crewai/tasks/hallucination_guardrail.py new file mode 100644 index 000000000..3079bc243 --- /dev/null +++ b/src/crewai/tasks/hallucination_guardrail.py @@ -0,0 +1,96 @@ +"""Hallucination Guardrail Placeholder for CrewAI. + +This is a no-op version of the HallucinationGuardrail for the open-source repository. + +Classes: + HallucinationGuardrail: Placeholder guardrail that validates task outputs. +""" + +from typing import Any, Optional, Tuple + +from crewai.llm import LLM +from crewai.tasks.task_output import TaskOutput +from crewai.utilities.logger import Logger + + +class HallucinationGuardrail: + """Placeholder for the HallucinationGuardrail feature. + + Attributes: + context: The reference context that outputs would be checked against. + llm: The language model that would be used for evaluation. + threshold: Optional minimum faithfulness score that would be required to pass. + tool_response: Optional tool response information that would be used in evaluation. + + Examples: + >>> # Basic usage with default verdict logic + >>> guardrail = HallucinationGuardrail( + ... context="AI helps with various tasks including analysis and generation.", + ... llm=agent.llm + ... ) + + >>> # With custom threshold for stricter validation + >>> strict_guardrail = HallucinationGuardrail( + ... context="Quantum computing uses qubits in superposition.", + ... llm=agent.llm, + ... threshold=8.0 # Would require score >= 8 to pass in enterprise version + ... ) + + >>> # With tool response for additional context + >>> guardrail_with_tools = HallucinationGuardrail( + ... context="The current weather data", + ... llm=agent.llm, + ... tool_response="Weather API returned: Temperature 22°C, Humidity 65%" + ... ) + """ + + def __init__( + self, + context: str, + llm: LLM, + threshold: Optional[float] = None, + tool_response: str = "", + ): + """Initialize the HallucinationGuardrail placeholder. + + Args: + context: The reference context that outputs would be checked against. + llm: The language model that would be used for evaluation. + threshold: Optional minimum faithfulness score that would be required to pass. + tool_response: Optional tool response information that would be used in evaluation. + """ + self.context = context + self.llm: LLM = llm + self.threshold = threshold + self.tool_response = tool_response + self._logger = Logger(verbose=True) + self._logger.log( + "warning", + """Hallucination detection is a no-op in open source, use it for free at https://app.crewai.com\n""", + color="red", + ) + + @property + def description(self) -> str: + """Generate a description of this guardrail for event logging.""" + return "HallucinationGuardrail (no-op)" + + def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]: + """Validate a task output against hallucination criteria. + + In the open source, this method always returns that the output is valid. + + Args: + task_output: The output to be validated. + + Returns: + A tuple containing: + - True + - The raw task output + """ + self._logger.log( + "warning", + "Premium hallucination detection skipped (use for free at https://app.crewai.com)\n", + color="red", + ) + return True, task_output.raw diff --git a/src/crewai/utilities/events/llm_guardrail_events.py b/src/crewai/utilities/events/llm_guardrail_events.py index a484c187a..01831e12c 100644 --- a/src/crewai/utilities/events/llm_guardrail_events.py +++ b/src/crewai/utilities/events/llm_guardrail_events.py @@ -19,10 +19,13 @@ class LLMGuardrailStartedEvent(BaseEvent): from inspect import getsource from crewai.tasks.llm_guardrail import LLMGuardrail + from crewai.tasks.hallucination_guardrail import HallucinationGuardrail super().__init__(**data) - if isinstance(self.guardrail, LLMGuardrail): + if isinstance(self.guardrail, LLMGuardrail) or isinstance( + self.guardrail, HallucinationGuardrail + ): self.guardrail = self.guardrail.description.strip() elif isinstance(self.guardrail, Callable): self.guardrail = getsource(self.guardrail).strip() diff --git a/tests/test_hallucination_guardrail.py b/tests/test_hallucination_guardrail.py new file mode 100644 index 000000000..af08a3924 --- /dev/null +++ b/tests/test_hallucination_guardrail.py @@ -0,0 +1,108 @@ +from unittest.mock import Mock + +import pytest + +from crewai.llm import LLM +from crewai.tasks.hallucination_guardrail import HallucinationGuardrail +from crewai.tasks.task_output import TaskOutput + + +def test_hallucination_guardrail_initialization(): + """Test that the hallucination guardrail initializes correctly with all parameters.""" + mock_llm = Mock(spec=LLM) + + guardrail = HallucinationGuardrail(context="Test reference context", llm=mock_llm) + + assert guardrail.context == "Test reference context" + assert guardrail.llm == mock_llm + assert guardrail.threshold is None + assert guardrail.tool_response == "" + + guardrail = HallucinationGuardrail( + context="Test reference context", + llm=mock_llm, + threshold=8.5, + tool_response="Sample tool response", + ) + + assert guardrail.context == "Test reference context" + assert guardrail.llm == mock_llm + assert guardrail.threshold == 8.5 + assert guardrail.tool_response == "Sample tool response" + + +def test_hallucination_guardrail_no_op_behavior(): + """Test that the guardrail always returns True in the open-source version.""" + mock_llm = Mock(spec=LLM) + guardrail = HallucinationGuardrail( + context="Test reference context", + llm=mock_llm, + threshold=9.0, + ) + + task_output = TaskOutput( + raw="Sample task output", + description="Test task", + expected_output="Expected output", + agent="Test Agent", + ) + + result, output = guardrail(task_output) + + assert result is True + assert output == "Sample task output" + + +def test_hallucination_guardrail_description(): + """Test that the guardrail provides the correct description for event logging.""" + guardrail = HallucinationGuardrail( + context="Test reference context", llm=Mock(spec=LLM) + ) + + assert guardrail.description == "HallucinationGuardrail (no-op)" + + +@pytest.mark.parametrize( + "context,task_output_text,threshold,tool_response", + [ + ( + "Earth orbits the Sun once every 365.25 days.", + "It takes Earth approximately one year to go around the Sun.", + None, + "", + ), + ( + "Python was created by Guido van Rossum in 1991.", + "Python is a programming language developed by Guido van Rossum.", + 7.5, + "", + ), + ( + "The capital of France is Paris.", + "Paris is the largest city and capital of France.", + 9.0, + "Geographic API returned: France capital is Paris", + ), + ], +) +def test_hallucination_guardrail_always_passes( + context, task_output_text, threshold, tool_response +): + """Test that the guardrail always passes regardless of configuration in open-source version.""" + mock_llm = Mock(spec=LLM) + + guardrail = HallucinationGuardrail( + context=context, llm=mock_llm, threshold=threshold, tool_response=tool_response + ) + + task_output = TaskOutput( + raw=task_output_text, + description="Test task", + expected_output="Expected output", + agent="Test Agent", + ) + + result, output = guardrail(task_output) + + assert result is True + assert output == task_output_text diff --git a/tests/test_task_guardrails.py b/tests/test_task_guardrails.py index aaac05bb7..901b962b9 100644 --- a/tests/test_task_guardrails.py +++ b/tests/test_task_guardrails.py @@ -1,9 +1,10 @@ -from unittest.mock import ANY, Mock, patch +from unittest.mock import Mock, patch import pytest from crewai import Agent, Task from crewai.llm import LLM +from crewai.tasks.hallucination_guardrail import HallucinationGuardrail from crewai.tasks.llm_guardrail import LLMGuardrail from crewai.tasks.task_output import TaskOutput from crewai.utilities.events import ( @@ -267,3 +268,37 @@ def test_guardrail_when_an_error_occurs(sample_agent, task_output): max_retries=0, ) task.execute_sync(agent=sample_agent) + + +def test_hallucination_guardrail_integration(): + """Test that HallucinationGuardrail integrates properly with the task system.""" + agent = Mock() + agent.role = "test_agent" + agent.execute_task.return_value = "test result" + agent.crew = None + + mock_llm = Mock(spec=LLM) + guardrail = HallucinationGuardrail( + context="Test reference context for validation", llm=mock_llm, threshold=8.0 + ) + + task = Task( + description="Test task with hallucination guardrail", + expected_output="Valid output", + guardrail=guardrail, + ) + + result = task.execute_sync(agent=agent) + assert isinstance(result, TaskOutput) + assert result.raw == "test result" + + +def test_hallucination_guardrail_description_in_events(): + """Test that HallucinationGuardrail description appears correctly in events.""" + mock_llm = Mock(spec=LLM) + guardrail = HallucinationGuardrail(context="Test context", llm=mock_llm) + + assert guardrail.description == "HallucinationGuardrail (no-op)" + + event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0) + assert event.guardrail == "HallucinationGuardrail (no-op)"