From 52862b4f0f7cae759c7bbf17100a2836e83e2a3a Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 19 Dec 2025 02:23:57 +0000
Subject: [PATCH] fix: handle ValidationError in guardrail retry loop (fixes
 #4126)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a Task has both output_pydantic and guardrails configured, if the
agent produces invalid JSON that fails Pydantic validation during a
retry, the system would crash instead of continuing to the next retry
attempt.

This fix wraps the _export_output call in a try-except block to catch
ValidationError and ConverterError exceptions. When these exceptions
occur, the pydantic_output and json_output are set to None, allowing
the retry loop to continue and the guardrail to check the raw output.

Changes:
- Import ValidationError from pydantic and ConverterError from converter
- Wrap _export_output calls in try-except in _invoke_guardrail_function
- Apply same fix to async version _ainvoke_guardrail_function
- Add 3 regression tests covering the issue scenario

Co-Authored-By: João <joao@crewai.com>
---
 lib/crewai/src/crewai/task.py            |  19 ++-
 lib/crewai/tests/test_task_guardrails.py | 152 +++++++++++++++++++++++
 2 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/lib/crewai/src/crewai/task.py b/lib/crewai/src/crewai/task.py
index 13d30b564..98223061a 100644
--- a/lib/crewai/src/crewai/task.py
+++ b/lib/crewai/src/crewai/task.py
@@ -24,6 +24,7 @@ from pydantic import (
     BaseModel,
     Field,
     PrivateAttr,
+    ValidationError,
     field_validator,
     model_validator,
 )
@@ -43,7 +44,7 @@ from crewai.tasks.task_output import TaskOutput
 from crewai.tools.base_tool import BaseTool
 from crewai.utilities.config import process_config
 from crewai.utilities.constants import NOT_SPECIFIED, _NotSpecified
-from crewai.utilities.converter import Converter, convert_to_model
+from crewai.utilities.converter import Converter, ConverterError, convert_to_model
 from crewai.utilities.guardrail import (
     process_guardrail,
 )
@@ -1044,7 +1045,13 @@ Follow these guidelines:
                 tools=tools,
             )
 
-            pydantic_output, json_output = self._export_output(result)
+            try:
+                pydantic_output, json_output = self._export_output(result)
+            except (ValidationError, ConverterError):
+                # If export fails due to invalid output format, set outputs to None
+                # and let the next iteration's guardrail check handle it
+                pydantic_output, json_output = None, None
+
             task_output = TaskOutput(
                 name=self.name or self.description,
                 description=self.description,
@@ -1140,7 +1147,13 @@ Follow these guidelines:
                 tools=tools,
             )
 
-            pydantic_output, json_output = self._export_output(result)
+            try:
+                pydantic_output, json_output = self._export_output(result)
+            except (ValidationError, ConverterError):
+                # If export fails due to invalid output format, set outputs to None
+                # and let the next iteration's guardrail check handle it
+                pydantic_output, json_output = None, None
+
             task_output = TaskOutput(
                 name=self.name or self.description,
                 description=self.description,
diff --git a/lib/crewai/tests/test_task_guardrails.py b/lib/crewai/tests/test_task_guardrails.py
index 7ceaf847b..5ffdc7125 100644
--- a/lib/crewai/tests/test_task_guardrails.py
+++ b/lib/crewai/tests/test_task_guardrails.py
@@ -752,3 +752,155 @@ def test_per_guardrail_independent_retry_tracking():
     assert call_counts["g3"] == 1
 
     assert "G3(1)" in result.raw
+
+
+def test_guardrail_retries_with_invalid_pydantic_output():
+    """Test that guardrail retries work when agent produces invalid pydantic output.
+
+    This test covers the bug reported in issue #4126 where guardrail_max_retries
+    logic was broken due to unhandled ValidationError in _invoke_guardrail_function.
+    When the agent produces invalid JSON that fails Pydantic validation, the system
+    should continue retrying instead of crashing.
+    """
+    from pydantic import BaseModel, Field
+
+    class OutputModel(BaseModel):
+        title: str = Field(description="The title")
+        content: str = Field(description="The content")
+
+    call_count = 0
+
+    def mock_execute_task(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            return "invalid json that will fail pydantic validation"
+        elif call_count == 2:
+            return "still invalid { broken json"
+        else:
+            return '{"title": "Valid Title", "content": "Valid Content"}'
+
+    def always_fail_guardrail(result: TaskOutput) -> tuple[bool, str]:
+        if call_count < 3:
+            return (False, "Output not valid yet")
+        return (True, result.raw)
+
+    agent = Mock()
+    agent.role = "test_agent"
+    agent.execute_task = mock_execute_task
+    agent.crew = None
+    agent.last_messages = []
+
+    task = create_smart_task(
+        description="Test pydantic validation during guardrail retries",
+        expected_output="Valid structured output",
+        guardrail=always_fail_guardrail,
+        output_pydantic=OutputModel,
+        guardrail_max_retries=3,
+    )
+
+    result = task.execute_sync(agent=agent)
+
+    assert call_count == 3
+    assert result.pydantic is not None
+    assert result.pydantic.title == "Valid Title"
+    assert result.pydantic.content == "Valid Content"
+
+
+def test_guardrail_max_retries_exhausted_with_invalid_pydantic():
+    """Test that max retries are properly exhausted even with pydantic validation errors.
+
+    This ensures that when the agent consistently produces invalid output that fails
+    pydantic validation, the retry loop continues until max_retries is exhausted,
+    rather than crashing on the first validation error.
+    """
+    from pydantic import BaseModel, Field
+
+    class StrictModel(BaseModel):
+        required_field: str = Field(description="A required field")
+
+    call_count = 0
+
+    def mock_execute_task(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        return "this is not valid json and will always fail"
+
+    def always_fail_guardrail(result: TaskOutput) -> tuple[bool, str]:
+        return (False, "Output is not valid")
+
+    agent = Mock()
+    agent.role = "test_agent"
+    agent.execute_task = mock_execute_task
+    agent.crew = None
+    agent.last_messages = []
+
+    task = create_smart_task(
+        description="Test max retries with invalid pydantic",
+        expected_output="Structured output",
+        guardrail=always_fail_guardrail,
+        output_pydantic=StrictModel,
+        guardrail_max_retries=2,
+    )
+
+    with pytest.raises(Exception) as exc_info:
+        task.execute_sync(agent=agent)
+
+    assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
+    assert call_count == 3
+
+
+def test_guardrail_with_pydantic_validation_error_continues_retry():
+    """Test that pydantic ValidationError during retry doesn't crash the loop.
+
+    This is a regression test for issue #4126. The bug was that when _export_output
+    raised a ValidationError during the guardrail retry loop, it would crash instead
+    of continuing to the next retry attempt.
+    """
+    from pydantic import BaseModel, Field
+
+    class TestModel(BaseModel):
+        value: int = Field(description="An integer value")
+
+    execution_results = [
+        "not json",
+        '{"value": "not_an_int"}',
+        '{"value": 42}',
+    ]
+    call_index = 0
+
+    def mock_execute_task(*args, **kwargs):
+        nonlocal call_index
+        result = execution_results[call_index]
+        call_index += 1
+        return result
+
+    guardrail_calls = 0
+
+    def counting_guardrail(result: TaskOutput) -> tuple[bool, str]:
+        nonlocal guardrail_calls
+        guardrail_calls += 1
+        if guardrail_calls < 3:
+            return (False, f"Retry attempt {guardrail_calls}")
+        return (True, result.raw)
+
+    agent = Mock()
+    agent.role = "test_agent"
+    agent.execute_task = mock_execute_task
+    agent.crew = None
+    agent.last_messages = []
+
+    task = create_smart_task(
+        description="Test ValidationError handling in retry loop",
+        expected_output="Integer output",
+        guardrail=counting_guardrail,
+        output_pydantic=TestModel,
+        guardrail_max_retries=3,
+    )
+
+    result = task.execute_sync(agent=agent)
+
+    assert call_index == 3
+    assert guardrail_calls == 3
+    assert result.pydantic is not None
+    assert result.pydantic.value == 42