From b7cb0186bd7f1fbf6e30a1a05ad422dea10235c6 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sat, 19 Jul 2025 22:56:22 +0000
Subject: [PATCH] Fix LLMGuardrailResult JSON parsing with trailing characters

- Extract robust JSON cleaning logic into shared clean_json_from_text() function
- Update LiteAgent to use clean_json_from_text() before model_validate_json()
- Add comprehensive test cases for JSON with trailing characters, markdown formatting, and prefixes
- Fixes GitHub issue #3191 where valid JSON failed to parse due to trailing text
- Maintains backward compatibility with existing JSON parsing behavior

Co-Authored-By: Jo\u00E3o <joao@crewai.com>
---
 src/crewai/lite_agent.py                      |   7 +-
 .../utilities/crew_pydantic_output_parser.py  |  32 +++---
 tests/test_json_parsing_fix.py                |  97 +++++++++++++++++
 tests/test_lite_agent.py                      | 100 +++++++++++++++++-
 tests/test_task_guardrails.py                 |  75 +++++++++++++
 5 files changed, 292 insertions(+), 19 deletions(-)
 create mode 100644 tests/test_json_parsing_fix.py

diff --git a/src/crewai/lite_agent.py b/src/crewai/lite_agent.py
index e14f4576f..3f0f51aa3 100644
--- a/src/crewai/lite_agent.py
+++ b/src/crewai/lite_agent.py
@@ -62,6 +62,7 @@ from crewai.utilities.agent_utils import (
     render_text_description_and_args,
 )
 from crewai.utilities.converter import generate_model_description
+from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text
 from crewai.utilities.events.agent_events import (
     AgentLogsExecutionEvent,
     LiteAgentExecutionCompletedEvent,
@@ -355,8 +356,8 @@ class LiteAgent(FlowTrackable, BaseModel):
         formatted_result: Optional[BaseModel] = None
         if self.response_format:
             try:
-                # Cast to BaseModel to ensure type safety
-                result = self.response_format.model_validate_json(agent_finish.output)
+                cleaned_output = clean_json_from_text(agent_finish.output)
+                result = self.response_format.model_validate_json(cleaned_output)
                 if isinstance(result, BaseModel):
                     formatted_result = result
             except Exception as e:
@@ -622,4 +623,4 @@ class LiteAgent(FlowTrackable, BaseModel):
 
     def _append_message(self, text: str, role: str = "assistant") -> None:
         """Append a message to the message list with the given role."""
-        self._messages.append(format_message_for_llm(text, role=role))
\ No newline at end of file
+        self._messages.append(format_message_for_llm(text, role=role))
diff --git a/src/crewai/utilities/crew_pydantic_output_parser.py b/src/crewai/utilities/crew_pydantic_output_parser.py
index d0dbfae06..a5fc9c7d2 100644
--- a/src/crewai/utilities/crew_pydantic_output_parser.py
+++ b/src/crewai/utilities/crew_pydantic_output_parser.py
@@ -8,6 +8,22 @@ from crewai.agents.parser import OutputParserException
 
 """Parser for converting text outputs into Pydantic models."""
 
+
+def clean_json_from_text(text: str) -> str:
+    """Extract and clean JSON from text that may contain markdown or trailing characters."""
+    text = text.replace("```", "").replace("json", "")
+    json_pattern = r"\{(?:[^{}]|(?R))*\}"
+    matches = regex.finditer(json_pattern, text)
+
+    for match in matches:
+        try:
+            json_obj = json.loads(match.group())
+            json_obj = json.dumps(json_obj)
+            return str(json_obj)
+        except json.JSONDecodeError:
+            continue
+    return text
+
 class CrewPydanticOutputParser:
     """Parses text outputs into specified Pydantic models."""
 
@@ -30,18 +46,4 @@ class CrewPydanticOutputParser:
             raise OutputParserException(error=msg)
 
     def _transform_in_valid_json(self, text) -> str:
-        text = text.replace("```", "").replace("json", "")
-        json_pattern = r"\{(?:[^{}]|(?R))*\}"
-        matches = regex.finditer(json_pattern, text)
-
-        for match in matches:
-            try:
-                # Attempt to parse the matched string as JSON
-                json_obj = json.loads(match.group())
-                # Return the first successfully parsed JSON object
-                json_obj = json.dumps(json_obj)
-                return str(json_obj)
-            except json.JSONDecodeError:
-                # If parsing fails, skip to the next match
-                continue
-        return text
+        return clean_json_from_text(text)
diff --git a/tests/test_json_parsing_fix.py b/tests/test_json_parsing_fix.py
new file mode 100644
index 000000000..32e7abeee
--- /dev/null
+++ b/tests/test_json_parsing_fix.py
@@ -0,0 +1,97 @@
+import pytest
+from pydantic import BaseModel, Field
+from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text
+
+
+class TestOutput(BaseModel):
+    summary: str = Field(description="A brief summary")
+    confidence: int = Field(description="Confidence level from 1-100")
+
+
+def test_clean_json_from_text_with_trailing_characters():
+    """Test that clean_json_from_text handles trailing characters correctly."""
+    text_with_trailing = '''{"summary": "Test summary", "confidence": 85}
+
+Additional text after JSON that should be ignored.
+Final Answer: This text should also be ignored.'''
+    
+    cleaned = clean_json_from_text(text_with_trailing)
+    expected = '{"summary": "Test summary", "confidence": 85}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_with_markdown():
+    """Test that clean_json_from_text handles markdown formatting correctly."""
+    text_with_markdown = '''```json
+{"summary": "Test summary with markdown", "confidence": 90}
+```'''
+    
+    cleaned = clean_json_from_text(text_with_markdown)
+    expected = '{"summary": "Test summary with markdown", "confidence": 90}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_with_prefix():
+    """Test that clean_json_from_text handles text prefix correctly."""
+    text_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
+    
+    cleaned = clean_json_from_text(text_with_prefix)
+    expected = '{"summary": "Test summary with prefix", "confidence": 95}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_pure_json():
+    """Test that clean_json_from_text handles pure JSON correctly."""
+    pure_json = '{"summary": "Pure JSON", "confidence": 100}'
+    
+    cleaned = clean_json_from_text(pure_json)
+    assert cleaned == pure_json
+
+
+def test_clean_json_from_text_no_json():
+    """Test that clean_json_from_text returns original text when no JSON found."""
+    no_json_text = "This is just plain text with no JSON"
+    
+    cleaned = clean_json_from_text(no_json_text)
+    assert cleaned == no_json_text
+
+
+def test_clean_json_from_text_invalid_json():
+    """Test that clean_json_from_text handles invalid JSON gracefully."""
+    invalid_json = '{"summary": "Invalid JSON", "confidence":}'
+    
+    cleaned = clean_json_from_text(invalid_json)
+    assert cleaned == invalid_json
+
+
+def test_clean_json_from_text_multiple_json_objects():
+    """Test that clean_json_from_text returns the first valid JSON object."""
+    multiple_json = '''{"summary": "First JSON", "confidence": 80}
+    
+Some text in between.
+
+{"summary": "Second JSON", "confidence": 90}'''
+    
+    cleaned = clean_json_from_text(multiple_json)
+    expected = '{"summary": "First JSON", "confidence": 80}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_nested_json():
+    """Test that clean_json_from_text handles nested JSON correctly."""
+    nested_json = '''{"summary": "Nested test", "details": {"score": 95, "category": "A"}, "confidence": 85}'''
+    
+    cleaned = clean_json_from_text(nested_json)
+    assert cleaned == nested_json
+
+
+def test_clean_json_from_text_with_complex_trailing():
+    """Test the exact scenario from GitHub issue #3191."""
+    github_issue_text = '''{"valid": true, "feedback": null}
+
+Agent failed to reach a final answer. This is likely a bug - please report it.
+Error details: maximum recursion depth exceeded in comparison'''
+    
+    cleaned = clean_json_from_text(github_issue_text)
+    expected = '{"valid": true, "feedback": null}'
+    assert cleaned == expected
diff --git a/tests/test_lite_agent.py b/tests/test_lite_agent.py
index b495cb800..ab58b4672 100644
--- a/tests/test_lite_agent.py
+++ b/tests/test_lite_agent.py
@@ -492,4 +492,102 @@ def test_lite_agent_with_invalid_llm():
                 backstory="Test backstory",
                 llm="invalid-model"
             )
-        assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
\ No newline at end of file
+        assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
+
+
+def test_lite_agent_structured_output_with_trailing_characters():
+    """Test that LiteAgent can handle JSON responses with trailing characters."""
+    from unittest.mock import patch
+    
+    class SimpleOutput(BaseModel):
+        summary: str = Field(description="A brief summary")
+        confidence: int = Field(description="Confidence level from 1-100")
+
+    mock_response_with_trailing = '''{"summary": "Test summary", "confidence": 85}
+
+Additional text after JSON that should be ignored.
+Final Answer: This text should also be ignored.'''
+
+    with patch('crewai.lite_agent.get_llm_response') as mock_llm:
+        mock_llm.return_value = mock_response_with_trailing
+        
+        agent = LiteAgent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory",
+            llm=LLM(model="gpt-4o-mini"),
+        )
+        
+        result = agent.kickoff(
+            "Test message",
+            response_format=SimpleOutput
+        )
+        
+        assert result.pydantic is not None
+        assert isinstance(result.pydantic, SimpleOutput)
+        assert result.pydantic.summary == "Test summary"
+        assert result.pydantic.confidence == 85
+
+
+def test_lite_agent_structured_output_with_markdown():
+    """Test that LiteAgent can handle JSON responses wrapped in markdown."""
+    from unittest.mock import patch
+    
+    class SimpleOutput(BaseModel):
+        summary: str = Field(description="A brief summary")
+        confidence: int = Field(description="Confidence level from 1-100")
+
+    mock_response_with_markdown = '''```json
+{"summary": "Test summary with markdown", "confidence": 90}
+```'''
+
+    with patch('crewai.lite_agent.get_llm_response') as mock_llm:
+        mock_llm.return_value = mock_response_with_markdown
+        
+        agent = LiteAgent(
+            role="Test Agent",
+            goal="Test goal", 
+            backstory="Test backstory",
+            llm=LLM(model="gpt-4o-mini"),
+        )
+        
+        result = agent.kickoff(
+            "Test message",
+            response_format=SimpleOutput
+        )
+        
+        assert result.pydantic is not None
+        assert isinstance(result.pydantic, SimpleOutput)
+        assert result.pydantic.summary == "Test summary with markdown"
+        assert result.pydantic.confidence == 90
+
+
+def test_lite_agent_structured_output_with_final_answer_prefix():
+    """Test that LiteAgent can handle JSON responses with Final Answer prefix."""
+    from unittest.mock import patch
+    
+    class SimpleOutput(BaseModel):
+        summary: str = Field(description="A brief summary")
+        confidence: int = Field(description="Confidence level from 1-100")
+
+    mock_response_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
+
+    with patch('crewai.lite_agent.get_llm_response') as mock_llm:
+        mock_llm.return_value = mock_response_with_prefix
+        
+        agent = LiteAgent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory", 
+            llm=LLM(model="gpt-4o-mini"),
+        )
+        
+        result = agent.kickoff(
+            "Test message",
+            response_format=SimpleOutput
+        )
+        
+        assert result.pydantic is not None
+        assert isinstance(result.pydantic, SimpleOutput)
+        assert result.pydantic.summary == "Test summary with prefix"
+        assert result.pydantic.confidence == 95
diff --git a/tests/test_task_guardrails.py b/tests/test_task_guardrails.py
index 901b962b9..457f91a5e 100644
--- a/tests/test_task_guardrails.py
+++ b/tests/test_task_guardrails.py
@@ -302,3 +302,78 @@ def test_hallucination_guardrail_description_in_events():
 
     event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0)
     assert event.guardrail == "HallucinationGuardrail (no-op)"
+
+
+def test_llm_guardrail_with_trailing_characters():
+    """Test that LLMGuardrail can handle responses with trailing characters."""
+    from unittest.mock import patch
+    
+    mock_response_with_trailing = '''{"valid": true, "feedback": null}
+
+Some additional text that should be ignored.
+More trailing content.'''
+
+    with patch('crewai.Agent.kickoff') as mock_kickoff:
+        from crewai.agent import LiteAgentOutput
+        from crewai.tasks.llm_guardrail import LLMGuardrailResult
+        
+        mock_output = LiteAgentOutput(
+            raw=mock_response_with_trailing,
+            pydantic=LLMGuardrailResult(valid=True, feedback=None),
+            agent_role="Guardrail Agent",
+            usage_metrics=None
+        )
+        mock_kickoff.return_value = mock_output
+        
+        guardrail = LLMGuardrail(
+            description="Test guardrail", 
+            llm=LLM(model="gpt-4o-mini")
+        )
+        
+        task_output = TaskOutput(
+            raw="Test task output",
+            description="Test task",
+            expected_output="Output",
+            agent="Test Agent",
+        )
+        
+        result = guardrail(task_output)
+        assert result[0] is True
+        assert result[1] == "Test task output"
+
+
+def test_llm_guardrail_with_markdown_formatting():
+    """Test that LLMGuardrail can handle responses with markdown formatting."""
+    from unittest.mock import patch
+    
+    mock_response_with_markdown = '''```json
+{"valid": false, "feedback": "The output does not meet the requirements"}
+```'''
+
+    with patch('crewai.Agent.kickoff') as mock_kickoff:
+        from crewai.agent import LiteAgentOutput
+        from crewai.tasks.llm_guardrail import LLMGuardrailResult
+        
+        mock_output = LiteAgentOutput(
+            raw=mock_response_with_markdown,
+            pydantic=LLMGuardrailResult(valid=False, feedback="The output does not meet the requirements"),
+            agent_role="Guardrail Agent",
+            usage_metrics=None
+        )
+        mock_kickoff.return_value = mock_output
+        
+        guardrail = LLMGuardrail(
+            description="Test guardrail", 
+            llm=LLM(model="gpt-4o-mini")
+        )
+        
+        task_output = TaskOutput(
+            raw="Test task output",
+            description="Test task",
+            expected_output="Output",
+            agent="Test Agent",
+        )
+        
+        result = guardrail(task_output)
+        assert result[0] is False
+        assert result[1] == "The output does not meet the requirements"