fix: handle ValidationError in guardrail retry loop (fixes #4126 )

When a Task has both output_pydantic and guardrails configured, if the agent produces invalid JSON that fails Pydantic validation during a retry, the system would crash instead of continuing to the next retry attempt. This fix wraps the _export_output call in a try-except block to catch ValidationError and ConverterError exceptions. When these exceptions occur, the pydantic_output and json_output are set to None, allowing the retry loop to continue and the guardrail to check the raw output. Changes: - Import ValidationError from pydantic and ConverterError from converter - Wrap _export_output calls in try-except in _invoke_guardrail_function - Apply same fix to async version _ainvoke_guardrail_function - Add 3 regression tests covering the issue scenario Co-Authored-By: João <joao@crewai.com>
chore: remove CREWAI_BASE_URL and fetch url from settings instead
2026-01-08 15:48:29 +00:00 · 2025-12-19 02:23:57 +00:00 · 2025-12-18 15:41:38 -03:00
5 changed files with 179 additions and 8 deletions
--- a/lib/crewai/src/crewai/cli/tools/main.py
+++ b/lib/crewai/src/crewai/cli/tools/main.py
@@ -12,6 +12,7 @@ from rich.console import Console
 from crewai.cli import git
 from crewai.cli.command import BaseCommand, PlusAPIMixin
 from crewai.cli.config import Settings
+from crewai.cli.constants import DEFAULT_CREWAI_ENTERPRISE_URL
 from crewai.cli.utils import (
    build_env_with_tool_repository_credentials,
    extract_available_exports,
@@ -131,10 +132,13 @@ class ToolCommand(BaseCommand, PlusAPIMixin):
        self._validate_response(publish_response)

        published_handle = publish_response.json()["handle"]
+        settings = Settings()
+        base_url = settings.enterprise_base_url or DEFAULT_CREWAI_ENTERPRISE_URL
+
        console.print(
            f"Successfully published `{published_handle}` ({project_version}).\n\n"
            + "⚠️ Security checks are running in the background. Your tool will be available once these are complete.\n"
-            + f"You can monitor the status or access your tool here:\nhttps://app.crewai.com/crewai_plus/tools/{published_handle}",
+            + f"You can monitor the status or access your tool here:\n{base_url}/crewai_plus/tools/{published_handle}",
            style="bold green",
        )

--- a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py
+++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py
@@ -9,6 +9,8 @@ from rich.console import Console
 from rich.panel import Panel

 from crewai.cli.authentication.token import AuthError, get_auth_token
+from crewai.cli.config import Settings
+from crewai.cli.constants import DEFAULT_CREWAI_ENTERPRISE_URL
 from crewai.cli.plus_api import PlusAPI
 from crewai.cli.version import get_crewai_version
 from crewai.events.listeners.tracing.types import TraceEvent
@@ -16,7 +18,6 @@ from crewai.events.listeners.tracing.utils import (
    is_tracing_enabled_in_context,
    should_auto_collect_first_time_traces,
 )
-from crewai.utilities.constants import CREWAI_BASE_URL


 logger = getLogger(__name__)
@@ -326,10 +327,12 @@ class TraceBatchManager:
            if response.status_code == 200:
                access_code = response.json().get("access_code", None)
                console = Console()
+                settings = Settings()
+                base_url = settings.enterprise_base_url or DEFAULT_CREWAI_ENTERPRISE_URL
                return_link = (
-                    f"{CREWAI_BASE_URL}/crewai_plus/trace_batches/{self.trace_batch_id}"
+                    f"{base_url}/crewai_plus/trace_batches/{self.trace_batch_id}"
                    if not self.is_current_batch_ephemeral and access_code is None
-                    else f"{CREWAI_BASE_URL}/crewai_plus/ephemeral_trace_batches/{self.trace_batch_id}?access_code={access_code}"
+                    else f"{base_url}/crewai_plus/ephemeral_trace_batches/{self.trace_batch_id}?access_code={access_code}"
                )

                if self.is_current_batch_ephemeral:
--- a/lib/crewai/src/crewai/task.py
+++ b/lib/crewai/src/crewai/task.py
@@ -24,6 +24,7 @@ from pydantic import (
    BaseModel,
    Field,
    PrivateAttr,
+    ValidationError,
    field_validator,
    model_validator,
 )
@@ -43,7 +44,7 @@ from crewai.tasks.task_output import TaskOutput
 from crewai.tools.base_tool import BaseTool
 from crewai.utilities.config import process_config
 from crewai.utilities.constants import NOT_SPECIFIED, _NotSpecified
-from crewai.utilities.converter import Converter, convert_to_model
+from crewai.utilities.converter import Converter, ConverterError, convert_to_model
 from crewai.utilities.guardrail import (
    process_guardrail,
 )
@@ -1044,7 +1045,13 @@ Follow these guidelines:
                tools=tools,
            )

-            pydantic_output, json_output = self._export_output(result)
+            try:
+                pydantic_output, json_output = self._export_output(result)
+            except (ValidationError, ConverterError):
+                # If export fails due to invalid output format, set outputs to None
+                # and let the next iteration's guardrail check handle it
+                pydantic_output, json_output = None, None
+
            task_output = TaskOutput(
                name=self.name or self.description,
                description=self.description,
@@ -1140,7 +1147,13 @@ Follow these guidelines:
                tools=tools,
            )

-            pydantic_output, json_output = self._export_output(result)
+            try:
+                pydantic_output, json_output = self._export_output(result)
+            except (ValidationError, ConverterError):
+                # If export fails due to invalid output format, set outputs to None
+                # and let the next iteration's guardrail check handle it
+                pydantic_output, json_output = None, None
+
            task_output = TaskOutput(
                name=self.name or self.description,
                description=self.description,
--- a/lib/crewai/src/crewai/utilities/constants.py
+++ b/lib/crewai/src/crewai/utilities/constants.py
@@ -30,4 +30,3 @@ NOT_SPECIFIED: Final[
        "allows us to distinguish between 'not passed at all' and 'explicitly passed None' or '[]'.",
    ]
 ] = _NotSpecified()
-CREWAI_BASE_URL: Final[str] = "https://app.crewai.com"
--- a/lib/crewai/tests/test_task_guardrails.py
+++ b/lib/crewai/tests/test_task_guardrails.py
@@ -752,3 +752,155 @@ def test_per_guardrail_independent_retry_tracking():
    assert call_counts["g3"] == 1

    assert "G3(1)" in result.raw
+
+
+def test_guardrail_retries_with_invalid_pydantic_output():
+    """Test that guardrail retries work when agent produces invalid pydantic output.
+
+    This test covers the bug reported in issue #4126 where guardrail_max_retries
+    logic was broken due to unhandled ValidationError in _invoke_guardrail_function.
+    When the agent produces invalid JSON that fails Pydantic validation, the system
+    should continue retrying instead of crashing.
+    """
+    from pydantic import BaseModel, Field
+
+    class OutputModel(BaseModel):
+        title: str = Field(description="The title")
+        content: str = Field(description="The content")
+
+    call_count = 0
+
+    def mock_execute_task(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            return "invalid json that will fail pydantic validation"
+        elif call_count == 2:
+            return "still invalid { broken json"
+        else:
+            return '{"title": "Valid Title", "content": "Valid Content"}'
+
+    def always_fail_guardrail(result: TaskOutput) -> tuple[bool, str]:
+        if call_count < 3:
+            return (False, "Output not valid yet")
+        return (True, result.raw)
+
+    agent = Mock()
+    agent.role = "test_agent"
+    agent.execute_task = mock_execute_task
+    agent.crew = None
+    agent.last_messages = []
+
+    task = create_smart_task(
+        description="Test pydantic validation during guardrail retries",
+        expected_output="Valid structured output",
+        guardrail=always_fail_guardrail,
+        output_pydantic=OutputModel,
+        guardrail_max_retries=3,
+    )
+
+    result = task.execute_sync(agent=agent)
+
+    assert call_count == 3
+    assert result.pydantic is not None
+    assert result.pydantic.title == "Valid Title"
+    assert result.pydantic.content == "Valid Content"
+
+
+def test_guardrail_max_retries_exhausted_with_invalid_pydantic():
+    """Test that max retries are properly exhausted even with pydantic validation errors.
+
+    This ensures that when the agent consistently produces invalid output that fails
+    pydantic validation, the retry loop continues until max_retries is exhausted,
+    rather than crashing on the first validation error.
+    """
+    from pydantic import BaseModel, Field
+
+    class StrictModel(BaseModel):
+        required_field: str = Field(description="A required field")
+
+    call_count = 0
+
+    def mock_execute_task(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        return "this is not valid json and will always fail"
+
+    def always_fail_guardrail(result: TaskOutput) -> tuple[bool, str]:
+        return (False, "Output is not valid")
+
+    agent = Mock()
+    agent.role = "test_agent"
+    agent.execute_task = mock_execute_task
+    agent.crew = None
+    agent.last_messages = []
+
+    task = create_smart_task(
+        description="Test max retries with invalid pydantic",
+        expected_output="Structured output",
+        guardrail=always_fail_guardrail,
+        output_pydantic=StrictModel,
+        guardrail_max_retries=2,
+    )
+
+    with pytest.raises(Exception) as exc_info:
+        task.execute_sync(agent=agent)
+
+    assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
+    assert call_count == 3
+
+
+def test_guardrail_with_pydantic_validation_error_continues_retry():
+    """Test that pydantic ValidationError during retry doesn't crash the loop.
+
+    This is a regression test for issue #4126. The bug was that when _export_output
+    raised a ValidationError during the guardrail retry loop, it would crash instead
+    of continuing to the next retry attempt.
+    """
+    from pydantic import BaseModel, Field
+
+    class TestModel(BaseModel):
+        value: int = Field(description="An integer value")
+
+    execution_results = [
+        "not json",
+        '{"value": "not_an_int"}',
+        '{"value": 42}',
+    ]
+    call_index = 0
+
+    def mock_execute_task(*args, **kwargs):
+        nonlocal call_index
+        result = execution_results[call_index]
+        call_index += 1
+        return result
+
+    guardrail_calls = 0
+
+    def counting_guardrail(result: TaskOutput) -> tuple[bool, str]:
+        nonlocal guardrail_calls
+        guardrail_calls += 1
+        if guardrail_calls < 3:
+            return (False, f"Retry attempt {guardrail_calls}")
+        return (True, result.raw)
+
+    agent = Mock()
+    agent.role = "test_agent"
+    agent.execute_task = mock_execute_task
+    agent.crew = None
+    agent.last_messages = []
+
+    task = create_smart_task(
+        description="Test ValidationError handling in retry loop",
+        expected_output="Integer output",
+        guardrail=counting_guardrail,
+        output_pydantic=TestModel,
+        guardrail_max_retries=3,
+    )
+
+    result = task.execute_sync(agent=agent)
+
+    assert call_index == 3
+    assert guardrail_calls == 3
+    assert result.pydantic is not None
+    assert result.pydantic.value == 42