mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-23 23:28:15 +00:00
Fix LLMGuardrailResult JSON parsing with trailing characters
- Extract robust JSON cleaning logic into shared clean_json_from_text() function - Update LiteAgent to use clean_json_from_text() before model_validate_json() - Add comprehensive test cases for JSON with trailing characters, markdown formatting, and prefixes - Fixes GitHub issue #3191 where valid JSON failed to parse due to trailing text - Maintains backward compatibility with existing JSON parsing behavior Co-Authored-By: Jo\u00E3o <joao@crewai.com>
This commit is contained in:
97
tests/test_json_parsing_fix.py
Normal file
97
tests/test_json_parsing_fix.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import pytest
|
||||
from pydantic import BaseModel, Field
|
||||
from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text
|
||||
|
||||
|
||||
class TestOutput(BaseModel):
|
||||
summary: str = Field(description="A brief summary")
|
||||
confidence: int = Field(description="Confidence level from 1-100")
|
||||
|
||||
|
||||
def test_clean_json_from_text_with_trailing_characters():
|
||||
"""Test that clean_json_from_text handles trailing characters correctly."""
|
||||
text_with_trailing = '''{"summary": "Test summary", "confidence": 85}
|
||||
|
||||
Additional text after JSON that should be ignored.
|
||||
Final Answer: This text should also be ignored.'''
|
||||
|
||||
cleaned = clean_json_from_text(text_with_trailing)
|
||||
expected = '{"summary": "Test summary", "confidence": 85}'
|
||||
assert cleaned == expected
|
||||
|
||||
|
||||
def test_clean_json_from_text_with_markdown():
|
||||
"""Test that clean_json_from_text handles markdown formatting correctly."""
|
||||
text_with_markdown = '''```json
|
||||
{"summary": "Test summary with markdown", "confidence": 90}
|
||||
```'''
|
||||
|
||||
cleaned = clean_json_from_text(text_with_markdown)
|
||||
expected = '{"summary": "Test summary with markdown", "confidence": 90}'
|
||||
assert cleaned == expected
|
||||
|
||||
|
||||
def test_clean_json_from_text_with_prefix():
|
||||
"""Test that clean_json_from_text handles text prefix correctly."""
|
||||
text_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
|
||||
|
||||
cleaned = clean_json_from_text(text_with_prefix)
|
||||
expected = '{"summary": "Test summary with prefix", "confidence": 95}'
|
||||
assert cleaned == expected
|
||||
|
||||
|
||||
def test_clean_json_from_text_pure_json():
|
||||
"""Test that clean_json_from_text handles pure JSON correctly."""
|
||||
pure_json = '{"summary": "Pure JSON", "confidence": 100}'
|
||||
|
||||
cleaned = clean_json_from_text(pure_json)
|
||||
assert cleaned == pure_json
|
||||
|
||||
|
||||
def test_clean_json_from_text_no_json():
|
||||
"""Test that clean_json_from_text returns original text when no JSON found."""
|
||||
no_json_text = "This is just plain text with no JSON"
|
||||
|
||||
cleaned = clean_json_from_text(no_json_text)
|
||||
assert cleaned == no_json_text
|
||||
|
||||
|
||||
def test_clean_json_from_text_invalid_json():
|
||||
"""Test that clean_json_from_text handles invalid JSON gracefully."""
|
||||
invalid_json = '{"summary": "Invalid JSON", "confidence":}'
|
||||
|
||||
cleaned = clean_json_from_text(invalid_json)
|
||||
assert cleaned == invalid_json
|
||||
|
||||
|
||||
def test_clean_json_from_text_multiple_json_objects():
|
||||
"""Test that clean_json_from_text returns the first valid JSON object."""
|
||||
multiple_json = '''{"summary": "First JSON", "confidence": 80}
|
||||
|
||||
Some text in between.
|
||||
|
||||
{"summary": "Second JSON", "confidence": 90}'''
|
||||
|
||||
cleaned = clean_json_from_text(multiple_json)
|
||||
expected = '{"summary": "First JSON", "confidence": 80}'
|
||||
assert cleaned == expected
|
||||
|
||||
|
||||
def test_clean_json_from_text_nested_json():
|
||||
"""Test that clean_json_from_text handles nested JSON correctly."""
|
||||
nested_json = '''{"summary": "Nested test", "details": {"score": 95, "category": "A"}, "confidence": 85}'''
|
||||
|
||||
cleaned = clean_json_from_text(nested_json)
|
||||
assert cleaned == nested_json
|
||||
|
||||
|
||||
def test_clean_json_from_text_with_complex_trailing():
|
||||
"""Test the exact scenario from GitHub issue #3191."""
|
||||
github_issue_text = '''{"valid": true, "feedback": null}
|
||||
|
||||
Agent failed to reach a final answer. This is likely a bug - please report it.
|
||||
Error details: maximum recursion depth exceeded in comparison'''
|
||||
|
||||
cleaned = clean_json_from_text(github_issue_text)
|
||||
expected = '{"valid": true, "feedback": null}'
|
||||
assert cleaned == expected
|
||||
@@ -492,4 +492,102 @@ def test_lite_agent_with_invalid_llm():
|
||||
backstory="Test backstory",
|
||||
llm="invalid-model"
|
||||
)
|
||||
assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
|
||||
assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_lite_agent_structured_output_with_trailing_characters():
|
||||
"""Test that LiteAgent can handle JSON responses with trailing characters."""
|
||||
from unittest.mock import patch
|
||||
|
||||
class SimpleOutput(BaseModel):
|
||||
summary: str = Field(description="A brief summary")
|
||||
confidence: int = Field(description="Confidence level from 1-100")
|
||||
|
||||
mock_response_with_trailing = '''{"summary": "Test summary", "confidence": 85}
|
||||
|
||||
Additional text after JSON that should be ignored.
|
||||
Final Answer: This text should also be ignored.'''
|
||||
|
||||
with patch('crewai.lite_agent.get_llm_response') as mock_llm:
|
||||
mock_llm.return_value = mock_response_with_trailing
|
||||
|
||||
agent = LiteAgent(
|
||||
role="Test Agent",
|
||||
goal="Test goal",
|
||||
backstory="Test backstory",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
result = agent.kickoff(
|
||||
"Test message",
|
||||
response_format=SimpleOutput
|
||||
)
|
||||
|
||||
assert result.pydantic is not None
|
||||
assert isinstance(result.pydantic, SimpleOutput)
|
||||
assert result.pydantic.summary == "Test summary"
|
||||
assert result.pydantic.confidence == 85
|
||||
|
||||
|
||||
def test_lite_agent_structured_output_with_markdown():
|
||||
"""Test that LiteAgent can handle JSON responses wrapped in markdown."""
|
||||
from unittest.mock import patch
|
||||
|
||||
class SimpleOutput(BaseModel):
|
||||
summary: str = Field(description="A brief summary")
|
||||
confidence: int = Field(description="Confidence level from 1-100")
|
||||
|
||||
mock_response_with_markdown = '''```json
|
||||
{"summary": "Test summary with markdown", "confidence": 90}
|
||||
```'''
|
||||
|
||||
with patch('crewai.lite_agent.get_llm_response') as mock_llm:
|
||||
mock_llm.return_value = mock_response_with_markdown
|
||||
|
||||
agent = LiteAgent(
|
||||
role="Test Agent",
|
||||
goal="Test goal",
|
||||
backstory="Test backstory",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
result = agent.kickoff(
|
||||
"Test message",
|
||||
response_format=SimpleOutput
|
||||
)
|
||||
|
||||
assert result.pydantic is not None
|
||||
assert isinstance(result.pydantic, SimpleOutput)
|
||||
assert result.pydantic.summary == "Test summary with markdown"
|
||||
assert result.pydantic.confidence == 90
|
||||
|
||||
|
||||
def test_lite_agent_structured_output_with_final_answer_prefix():
|
||||
"""Test that LiteAgent can handle JSON responses with Final Answer prefix."""
|
||||
from unittest.mock import patch
|
||||
|
||||
class SimpleOutput(BaseModel):
|
||||
summary: str = Field(description="A brief summary")
|
||||
confidence: int = Field(description="Confidence level from 1-100")
|
||||
|
||||
mock_response_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
|
||||
|
||||
with patch('crewai.lite_agent.get_llm_response') as mock_llm:
|
||||
mock_llm.return_value = mock_response_with_prefix
|
||||
|
||||
agent = LiteAgent(
|
||||
role="Test Agent",
|
||||
goal="Test goal",
|
||||
backstory="Test backstory",
|
||||
llm=LLM(model="gpt-4o-mini"),
|
||||
)
|
||||
|
||||
result = agent.kickoff(
|
||||
"Test message",
|
||||
response_format=SimpleOutput
|
||||
)
|
||||
|
||||
assert result.pydantic is not None
|
||||
assert isinstance(result.pydantic, SimpleOutput)
|
||||
assert result.pydantic.summary == "Test summary with prefix"
|
||||
assert result.pydantic.confidence == 95
|
||||
|
||||
@@ -302,3 +302,78 @@ def test_hallucination_guardrail_description_in_events():
|
||||
|
||||
event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0)
|
||||
assert event.guardrail == "HallucinationGuardrail (no-op)"
|
||||
|
||||
|
||||
def test_llm_guardrail_with_trailing_characters():
|
||||
"""Test that LLMGuardrail can handle responses with trailing characters."""
|
||||
from unittest.mock import patch
|
||||
|
||||
mock_response_with_trailing = '''{"valid": true, "feedback": null}
|
||||
|
||||
Some additional text that should be ignored.
|
||||
More trailing content.'''
|
||||
|
||||
with patch('crewai.Agent.kickoff') as mock_kickoff:
|
||||
from crewai.agent import LiteAgentOutput
|
||||
from crewai.tasks.llm_guardrail import LLMGuardrailResult
|
||||
|
||||
mock_output = LiteAgentOutput(
|
||||
raw=mock_response_with_trailing,
|
||||
pydantic=LLMGuardrailResult(valid=True, feedback=None),
|
||||
agent_role="Guardrail Agent",
|
||||
usage_metrics=None
|
||||
)
|
||||
mock_kickoff.return_value = mock_output
|
||||
|
||||
guardrail = LLMGuardrail(
|
||||
description="Test guardrail",
|
||||
llm=LLM(model="gpt-4o-mini")
|
||||
)
|
||||
|
||||
task_output = TaskOutput(
|
||||
raw="Test task output",
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
agent="Test Agent",
|
||||
)
|
||||
|
||||
result = guardrail(task_output)
|
||||
assert result[0] is True
|
||||
assert result[1] == "Test task output"
|
||||
|
||||
|
||||
def test_llm_guardrail_with_markdown_formatting():
|
||||
"""Test that LLMGuardrail can handle responses with markdown formatting."""
|
||||
from unittest.mock import patch
|
||||
|
||||
mock_response_with_markdown = '''```json
|
||||
{"valid": false, "feedback": "The output does not meet the requirements"}
|
||||
```'''
|
||||
|
||||
with patch('crewai.Agent.kickoff') as mock_kickoff:
|
||||
from crewai.agent import LiteAgentOutput
|
||||
from crewai.tasks.llm_guardrail import LLMGuardrailResult
|
||||
|
||||
mock_output = LiteAgentOutput(
|
||||
raw=mock_response_with_markdown,
|
||||
pydantic=LLMGuardrailResult(valid=False, feedback="The output does not meet the requirements"),
|
||||
agent_role="Guardrail Agent",
|
||||
usage_metrics=None
|
||||
)
|
||||
mock_kickoff.return_value = mock_output
|
||||
|
||||
guardrail = LLMGuardrail(
|
||||
description="Test guardrail",
|
||||
llm=LLM(model="gpt-4o-mini")
|
||||
)
|
||||
|
||||
task_output = TaskOutput(
|
||||
raw="Test task output",
|
||||
description="Test task",
|
||||
expected_output="Output",
|
||||
agent="Test Agent",
|
||||
)
|
||||
|
||||
result = guardrail(task_output)
|
||||
assert result[0] is False
|
||||
assert result[1] == "The output does not meet the requirements"
|
||||
|
||||
Reference in New Issue
Block a user