From b7cb0186bd7f1fbf6e30a1a05ad422dea10235c6 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 19 Jul 2025 22:56:22 +0000 Subject: [PATCH] Fix LLMGuardrailResult JSON parsing with trailing characters - Extract robust JSON cleaning logic into shared clean_json_from_text() function - Update LiteAgent to use clean_json_from_text() before model_validate_json() - Add comprehensive test cases for JSON with trailing characters, markdown formatting, and prefixes - Fixes GitHub issue #3191 where valid JSON failed to parse due to trailing text - Maintains backward compatibility with existing JSON parsing behavior Co-Authored-By: Jo\u00E3o --- src/crewai/lite_agent.py | 7 +- .../utilities/crew_pydantic_output_parser.py | 32 +++--- tests/test_json_parsing_fix.py | 97 +++++++++++++++++ tests/test_lite_agent.py | 100 +++++++++++++++++- tests/test_task_guardrails.py | 75 +++++++++++++ 5 files changed, 292 insertions(+), 19 deletions(-) create mode 100644 tests/test_json_parsing_fix.py diff --git a/src/crewai/lite_agent.py b/src/crewai/lite_agent.py index e14f4576f..3f0f51aa3 100644 --- a/src/crewai/lite_agent.py +++ b/src/crewai/lite_agent.py @@ -62,6 +62,7 @@ from crewai.utilities.agent_utils import ( render_text_description_and_args, ) from crewai.utilities.converter import generate_model_description +from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text from crewai.utilities.events.agent_events import ( AgentLogsExecutionEvent, LiteAgentExecutionCompletedEvent, @@ -355,8 +356,8 @@ class LiteAgent(FlowTrackable, BaseModel): formatted_result: Optional[BaseModel] = None if self.response_format: try: - # Cast to BaseModel to ensure type safety - result = self.response_format.model_validate_json(agent_finish.output) + cleaned_output = clean_json_from_text(agent_finish.output) + result = self.response_format.model_validate_json(cleaned_output) if isinstance(result, BaseModel): formatted_result = result except Exception as e: @@ -622,4 +623,4 @@ class LiteAgent(FlowTrackable, BaseModel): def _append_message(self, text: str, role: str = "assistant") -> None: """Append a message to the message list with the given role.""" - self._messages.append(format_message_for_llm(text, role=role)) \ No newline at end of file + self._messages.append(format_message_for_llm(text, role=role)) diff --git a/src/crewai/utilities/crew_pydantic_output_parser.py b/src/crewai/utilities/crew_pydantic_output_parser.py index d0dbfae06..a5fc9c7d2 100644 --- a/src/crewai/utilities/crew_pydantic_output_parser.py +++ b/src/crewai/utilities/crew_pydantic_output_parser.py @@ -8,6 +8,22 @@ from crewai.agents.parser import OutputParserException """Parser for converting text outputs into Pydantic models.""" + +def clean_json_from_text(text: str) -> str: + """Extract and clean JSON from text that may contain markdown or trailing characters.""" + text = text.replace("```", "").replace("json", "") + json_pattern = r"\{(?:[^{}]|(?R))*\}" + matches = regex.finditer(json_pattern, text) + + for match in matches: + try: + json_obj = json.loads(match.group()) + json_obj = json.dumps(json_obj) + return str(json_obj) + except json.JSONDecodeError: + continue + return text + class CrewPydanticOutputParser: """Parses text outputs into specified Pydantic models.""" @@ -30,18 +46,4 @@ class CrewPydanticOutputParser: raise OutputParserException(error=msg) def _transform_in_valid_json(self, text) -> str: - text = text.replace("```", "").replace("json", "") - json_pattern = r"\{(?:[^{}]|(?R))*\}" - matches = regex.finditer(json_pattern, text) - - for match in matches: - try: - # Attempt to parse the matched string as JSON - json_obj = json.loads(match.group()) - # Return the first successfully parsed JSON object - json_obj = json.dumps(json_obj) - return str(json_obj) - except json.JSONDecodeError: - # If parsing fails, skip to the next match - continue - return text + return clean_json_from_text(text) diff --git a/tests/test_json_parsing_fix.py b/tests/test_json_parsing_fix.py new file mode 100644 index 000000000..32e7abeee --- /dev/null +++ b/tests/test_json_parsing_fix.py @@ -0,0 +1,97 @@ +import pytest +from pydantic import BaseModel, Field +from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text + + +class TestOutput(BaseModel): + summary: str = Field(description="A brief summary") + confidence: int = Field(description="Confidence level from 1-100") + + +def test_clean_json_from_text_with_trailing_characters(): + """Test that clean_json_from_text handles trailing characters correctly.""" + text_with_trailing = '''{"summary": "Test summary", "confidence": 85} + +Additional text after JSON that should be ignored. +Final Answer: This text should also be ignored.''' + + cleaned = clean_json_from_text(text_with_trailing) + expected = '{"summary": "Test summary", "confidence": 85}' + assert cleaned == expected + + +def test_clean_json_from_text_with_markdown(): + """Test that clean_json_from_text handles markdown formatting correctly.""" + text_with_markdown = '''```json +{"summary": "Test summary with markdown", "confidence": 90} +```''' + + cleaned = clean_json_from_text(text_with_markdown) + expected = '{"summary": "Test summary with markdown", "confidence": 90}' + assert cleaned == expected + + +def test_clean_json_from_text_with_prefix(): + """Test that clean_json_from_text handles text prefix correctly.""" + text_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}''' + + cleaned = clean_json_from_text(text_with_prefix) + expected = '{"summary": "Test summary with prefix", "confidence": 95}' + assert cleaned == expected + + +def test_clean_json_from_text_pure_json(): + """Test that clean_json_from_text handles pure JSON correctly.""" + pure_json = '{"summary": "Pure JSON", "confidence": 100}' + + cleaned = clean_json_from_text(pure_json) + assert cleaned == pure_json + + +def test_clean_json_from_text_no_json(): + """Test that clean_json_from_text returns original text when no JSON found.""" + no_json_text = "This is just plain text with no JSON" + + cleaned = clean_json_from_text(no_json_text) + assert cleaned == no_json_text + + +def test_clean_json_from_text_invalid_json(): + """Test that clean_json_from_text handles invalid JSON gracefully.""" + invalid_json = '{"summary": "Invalid JSON", "confidence":}' + + cleaned = clean_json_from_text(invalid_json) + assert cleaned == invalid_json + + +def test_clean_json_from_text_multiple_json_objects(): + """Test that clean_json_from_text returns the first valid JSON object.""" + multiple_json = '''{"summary": "First JSON", "confidence": 80} + +Some text in between. + +{"summary": "Second JSON", "confidence": 90}''' + + cleaned = clean_json_from_text(multiple_json) + expected = '{"summary": "First JSON", "confidence": 80}' + assert cleaned == expected + + +def test_clean_json_from_text_nested_json(): + """Test that clean_json_from_text handles nested JSON correctly.""" + nested_json = '''{"summary": "Nested test", "details": {"score": 95, "category": "A"}, "confidence": 85}''' + + cleaned = clean_json_from_text(nested_json) + assert cleaned == nested_json + + +def test_clean_json_from_text_with_complex_trailing(): + """Test the exact scenario from GitHub issue #3191.""" + github_issue_text = '''{"valid": true, "feedback": null} + +Agent failed to reach a final answer. This is likely a bug - please report it. +Error details: maximum recursion depth exceeded in comparison''' + + cleaned = clean_json_from_text(github_issue_text) + expected = '{"valid": true, "feedback": null}' + assert cleaned == expected diff --git a/tests/test_lite_agent.py b/tests/test_lite_agent.py index b495cb800..ab58b4672 100644 --- a/tests/test_lite_agent.py +++ b/tests/test_lite_agent.py @@ -492,4 +492,102 @@ def test_lite_agent_with_invalid_llm(): backstory="Test backstory", llm="invalid-model" ) - assert "Expected LLM instance of type BaseLLM" in str(exc_info.value) \ No newline at end of file + assert "Expected LLM instance of type BaseLLM" in str(exc_info.value) + + +def test_lite_agent_structured_output_with_trailing_characters(): + """Test that LiteAgent can handle JSON responses with trailing characters.""" + from unittest.mock import patch + + class SimpleOutput(BaseModel): + summary: str = Field(description="A brief summary") + confidence: int = Field(description="Confidence level from 1-100") + + mock_response_with_trailing = '''{"summary": "Test summary", "confidence": 85} + +Additional text after JSON that should be ignored. +Final Answer: This text should also be ignored.''' + + with patch('crewai.lite_agent.get_llm_response') as mock_llm: + mock_llm.return_value = mock_response_with_trailing + + agent = LiteAgent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + llm=LLM(model="gpt-4o-mini"), + ) + + result = agent.kickoff( + "Test message", + response_format=SimpleOutput + ) + + assert result.pydantic is not None + assert isinstance(result.pydantic, SimpleOutput) + assert result.pydantic.summary == "Test summary" + assert result.pydantic.confidence == 85 + + +def test_lite_agent_structured_output_with_markdown(): + """Test that LiteAgent can handle JSON responses wrapped in markdown.""" + from unittest.mock import patch + + class SimpleOutput(BaseModel): + summary: str = Field(description="A brief summary") + confidence: int = Field(description="Confidence level from 1-100") + + mock_response_with_markdown = '''```json +{"summary": "Test summary with markdown", "confidence": 90} +```''' + + with patch('crewai.lite_agent.get_llm_response') as mock_llm: + mock_llm.return_value = mock_response_with_markdown + + agent = LiteAgent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + llm=LLM(model="gpt-4o-mini"), + ) + + result = agent.kickoff( + "Test message", + response_format=SimpleOutput + ) + + assert result.pydantic is not None + assert isinstance(result.pydantic, SimpleOutput) + assert result.pydantic.summary == "Test summary with markdown" + assert result.pydantic.confidence == 90 + + +def test_lite_agent_structured_output_with_final_answer_prefix(): + """Test that LiteAgent can handle JSON responses with Final Answer prefix.""" + from unittest.mock import patch + + class SimpleOutput(BaseModel): + summary: str = Field(description="A brief summary") + confidence: int = Field(description="Confidence level from 1-100") + + mock_response_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}''' + + with patch('crewai.lite_agent.get_llm_response') as mock_llm: + mock_llm.return_value = mock_response_with_prefix + + agent = LiteAgent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + llm=LLM(model="gpt-4o-mini"), + ) + + result = agent.kickoff( + "Test message", + response_format=SimpleOutput + ) + + assert result.pydantic is not None + assert isinstance(result.pydantic, SimpleOutput) + assert result.pydantic.summary == "Test summary with prefix" + assert result.pydantic.confidence == 95 diff --git a/tests/test_task_guardrails.py b/tests/test_task_guardrails.py index 901b962b9..457f91a5e 100644 --- a/tests/test_task_guardrails.py +++ b/tests/test_task_guardrails.py @@ -302,3 +302,78 @@ def test_hallucination_guardrail_description_in_events(): event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0) assert event.guardrail == "HallucinationGuardrail (no-op)" + + +def test_llm_guardrail_with_trailing_characters(): + """Test that LLMGuardrail can handle responses with trailing characters.""" + from unittest.mock import patch + + mock_response_with_trailing = '''{"valid": true, "feedback": null} + +Some additional text that should be ignored. +More trailing content.''' + + with patch('crewai.Agent.kickoff') as mock_kickoff: + from crewai.agent import LiteAgentOutput + from crewai.tasks.llm_guardrail import LLMGuardrailResult + + mock_output = LiteAgentOutput( + raw=mock_response_with_trailing, + pydantic=LLMGuardrailResult(valid=True, feedback=None), + agent_role="Guardrail Agent", + usage_metrics=None + ) + mock_kickoff.return_value = mock_output + + guardrail = LLMGuardrail( + description="Test guardrail", + llm=LLM(model="gpt-4o-mini") + ) + + task_output = TaskOutput( + raw="Test task output", + description="Test task", + expected_output="Output", + agent="Test Agent", + ) + + result = guardrail(task_output) + assert result[0] is True + assert result[1] == "Test task output" + + +def test_llm_guardrail_with_markdown_formatting(): + """Test that LLMGuardrail can handle responses with markdown formatting.""" + from unittest.mock import patch + + mock_response_with_markdown = '''```json +{"valid": false, "feedback": "The output does not meet the requirements"} +```''' + + with patch('crewai.Agent.kickoff') as mock_kickoff: + from crewai.agent import LiteAgentOutput + from crewai.tasks.llm_guardrail import LLMGuardrailResult + + mock_output = LiteAgentOutput( + raw=mock_response_with_markdown, + pydantic=LLMGuardrailResult(valid=False, feedback="The output does not meet the requirements"), + agent_role="Guardrail Agent", + usage_metrics=None + ) + mock_kickoff.return_value = mock_output + + guardrail = LLMGuardrail( + description="Test guardrail", + llm=LLM(model="gpt-4o-mini") + ) + + task_output = TaskOutput( + raw="Test task output", + description="Test task", + expected_output="Output", + agent="Test Agent", + ) + + result = guardrail(task_output) + assert result[0] is False + assert result[1] == "The output does not meet the requirements"