Fix LLMGuardrailResult JSON parsing with trailing characters

- Extract robust JSON cleaning logic into shared clean_json_from_text() function - Update LiteAgent to use clean_json_from_text() before model_validate_json() - Add comprehensive test cases for JSON with trailing characters, markdown formatting, and prefixes - Fixes GitHub issue #3191 where valid JSON failed to parse due to trailing text - Maintains backward compatibility with existing JSON parsing behavior Co-Authored-By: Jo\u00E3o <joao@crewai.com>
2026-01-23 23:28:15 +00:00 · 2025-07-19 22:56:22 +00:00
parent 942014962e
commit b7cb0186bd
5 changed files with 292 additions and 19 deletions
--- a/tests/test_json_parsing_fix.py
+++ b/tests/test_json_parsing_fix.py
@@ -0,0 +1,97 @@
+import pytest
+from pydantic import BaseModel, Field
+from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text
+
+
+class TestOutput(BaseModel):
+    summary: str = Field(description="A brief summary")
+    confidence: int = Field(description="Confidence level from 1-100")
+
+
+def test_clean_json_from_text_with_trailing_characters():
+    """Test that clean_json_from_text handles trailing characters correctly."""
+    text_with_trailing = '''{"summary": "Test summary", "confidence": 85}
+
+Additional text after JSON that should be ignored.
+Final Answer: This text should also be ignored.'''
+    
+    cleaned = clean_json_from_text(text_with_trailing)
+    expected = '{"summary": "Test summary", "confidence": 85}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_with_markdown():
+    """Test that clean_json_from_text handles markdown formatting correctly."""
+    text_with_markdown = '''```json
+{"summary": "Test summary with markdown", "confidence": 90}
+```'''
+    
+    cleaned = clean_json_from_text(text_with_markdown)
+    expected = '{"summary": "Test summary with markdown", "confidence": 90}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_with_prefix():
+    """Test that clean_json_from_text handles text prefix correctly."""
+    text_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
+    
+    cleaned = clean_json_from_text(text_with_prefix)
+    expected = '{"summary": "Test summary with prefix", "confidence": 95}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_pure_json():
+    """Test that clean_json_from_text handles pure JSON correctly."""
+    pure_json = '{"summary": "Pure JSON", "confidence": 100}'
+    
+    cleaned = clean_json_from_text(pure_json)
+    assert cleaned == pure_json
+
+
+def test_clean_json_from_text_no_json():
+    """Test that clean_json_from_text returns original text when no JSON found."""
+    no_json_text = "This is just plain text with no JSON"
+    
+    cleaned = clean_json_from_text(no_json_text)
+    assert cleaned == no_json_text
+
+
+def test_clean_json_from_text_invalid_json():
+    """Test that clean_json_from_text handles invalid JSON gracefully."""
+    invalid_json = '{"summary": "Invalid JSON", "confidence":}'
+    
+    cleaned = clean_json_from_text(invalid_json)
+    assert cleaned == invalid_json
+
+
+def test_clean_json_from_text_multiple_json_objects():
+    """Test that clean_json_from_text returns the first valid JSON object."""
+    multiple_json = '''{"summary": "First JSON", "confidence": 80}
+    
+Some text in between.
+
+{"summary": "Second JSON", "confidence": 90}'''
+    
+    cleaned = clean_json_from_text(multiple_json)
+    expected = '{"summary": "First JSON", "confidence": 80}'
+    assert cleaned == expected
+
+
+def test_clean_json_from_text_nested_json():
+    """Test that clean_json_from_text handles nested JSON correctly."""
+    nested_json = '''{"summary": "Nested test", "details": {"score": 95, "category": "A"}, "confidence": 85}'''
+    
+    cleaned = clean_json_from_text(nested_json)
+    assert cleaned == nested_json
+
+
+def test_clean_json_from_text_with_complex_trailing():
+    """Test the exact scenario from GitHub issue #3191."""
+    github_issue_text = '''{"valid": true, "feedback": null}
+
+Agent failed to reach a final answer. This is likely a bug - please report it.
+Error details: maximum recursion depth exceeded in comparison'''
+    
+    cleaned = clean_json_from_text(github_issue_text)
+    expected = '{"valid": true, "feedback": null}'
+    assert cleaned == expected
--- a/tests/test_lite_agent.py
+++ b/tests/test_lite_agent.py
@@ -492,4 +492,102 @@ def test_lite_agent_with_invalid_llm():
                backstory="Test backstory",
                llm="invalid-model"
            )
-        assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
+        assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
+
+
+def test_lite_agent_structured_output_with_trailing_characters():
+    """Test that LiteAgent can handle JSON responses with trailing characters."""
+    from unittest.mock import patch
+    
+    class SimpleOutput(BaseModel):
+        summary: str = Field(description="A brief summary")
+        confidence: int = Field(description="Confidence level from 1-100")
+
+    mock_response_with_trailing = '''{"summary": "Test summary", "confidence": 85}
+
+Additional text after JSON that should be ignored.
+Final Answer: This text should also be ignored.'''
+
+    with patch('crewai.lite_agent.get_llm_response') as mock_llm:
+        mock_llm.return_value = mock_response_with_trailing
+        
+        agent = LiteAgent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory",
+            llm=LLM(model="gpt-4o-mini"),
+        )
+        
+        result = agent.kickoff(
+            "Test message",
+            response_format=SimpleOutput
+        )
+        
+        assert result.pydantic is not None
+        assert isinstance(result.pydantic, SimpleOutput)
+        assert result.pydantic.summary == "Test summary"
+        assert result.pydantic.confidence == 85
+
+
+def test_lite_agent_structured_output_with_markdown():
+    """Test that LiteAgent can handle JSON responses wrapped in markdown."""
+    from unittest.mock import patch
+    
+    class SimpleOutput(BaseModel):
+        summary: str = Field(description="A brief summary")
+        confidence: int = Field(description="Confidence level from 1-100")
+
+    mock_response_with_markdown = '''```json
+{"summary": "Test summary with markdown", "confidence": 90}
+```'''
+
+    with patch('crewai.lite_agent.get_llm_response') as mock_llm:
+        mock_llm.return_value = mock_response_with_markdown
+        
+        agent = LiteAgent(
+            role="Test Agent",
+            goal="Test goal", 
+            backstory="Test backstory",
+            llm=LLM(model="gpt-4o-mini"),
+        )
+        
+        result = agent.kickoff(
+            "Test message",
+            response_format=SimpleOutput
+        )
+        
+        assert result.pydantic is not None
+        assert isinstance(result.pydantic, SimpleOutput)
+        assert result.pydantic.summary == "Test summary with markdown"
+        assert result.pydantic.confidence == 90
+
+
+def test_lite_agent_structured_output_with_final_answer_prefix():
+    """Test that LiteAgent can handle JSON responses with Final Answer prefix."""
+    from unittest.mock import patch
+    
+    class SimpleOutput(BaseModel):
+        summary: str = Field(description="A brief summary")
+        confidence: int = Field(description="Confidence level from 1-100")
+
+    mock_response_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
+
+    with patch('crewai.lite_agent.get_llm_response') as mock_llm:
+        mock_llm.return_value = mock_response_with_prefix
+        
+        agent = LiteAgent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory", 
+            llm=LLM(model="gpt-4o-mini"),
+        )
+        
+        result = agent.kickoff(
+            "Test message",
+            response_format=SimpleOutput
+        )
+        
+        assert result.pydantic is not None
+        assert isinstance(result.pydantic, SimpleOutput)
+        assert result.pydantic.summary == "Test summary with prefix"
+        assert result.pydantic.confidence == 95
--- a/tests/test_task_guardrails.py
+++ b/tests/test_task_guardrails.py
@@ -302,3 +302,78 @@ def test_hallucination_guardrail_description_in_events():

    event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0)
    assert event.guardrail == "HallucinationGuardrail (no-op)"
+
+
+def test_llm_guardrail_with_trailing_characters():
+    """Test that LLMGuardrail can handle responses with trailing characters."""
+    from unittest.mock import patch
+    
+    mock_response_with_trailing = '''{"valid": true, "feedback": null}
+
+Some additional text that should be ignored.
+More trailing content.'''
+
+    with patch('crewai.Agent.kickoff') as mock_kickoff:
+        from crewai.agent import LiteAgentOutput
+        from crewai.tasks.llm_guardrail import LLMGuardrailResult
+        
+        mock_output = LiteAgentOutput(
+            raw=mock_response_with_trailing,
+            pydantic=LLMGuardrailResult(valid=True, feedback=None),
+            agent_role="Guardrail Agent",
+            usage_metrics=None
+        )
+        mock_kickoff.return_value = mock_output
+        
+        guardrail = LLMGuardrail(
+            description="Test guardrail", 
+            llm=LLM(model="gpt-4o-mini")
+        )
+        
+        task_output = TaskOutput(
+            raw="Test task output",
+            description="Test task",
+            expected_output="Output",
+            agent="Test Agent",
+        )
+        
+        result = guardrail(task_output)
+        assert result[0] is True
+        assert result[1] == "Test task output"
+
+
+def test_llm_guardrail_with_markdown_formatting():
+    """Test that LLMGuardrail can handle responses with markdown formatting."""
+    from unittest.mock import patch
+    
+    mock_response_with_markdown = '''```json
+{"valid": false, "feedback": "The output does not meet the requirements"}
+```'''
+
+    with patch('crewai.Agent.kickoff') as mock_kickoff:
+        from crewai.agent import LiteAgentOutput
+        from crewai.tasks.llm_guardrail import LLMGuardrailResult
+        
+        mock_output = LiteAgentOutput(
+            raw=mock_response_with_markdown,
+            pydantic=LLMGuardrailResult(valid=False, feedback="The output does not meet the requirements"),
+            agent_role="Guardrail Agent",
+            usage_metrics=None
+        )
+        mock_kickoff.return_value = mock_output
+        
+        guardrail = LLMGuardrail(
+            description="Test guardrail", 
+            llm=LLM(model="gpt-4o-mini")
+        )
+        
+        task_output = TaskOutput(
+            raw="Test task output",
+            description="Test task",
+            expected_output="Output",
+            agent="Test Agent",
+        )
+        
+        result = guardrail(task_output)
+        assert result[0] is False
+        assert result[1] == "The output does not meet the requirements"