Compare commits

...

2 Commits

Author SHA1 Message Date
Devin AI
fd5cd14eb0 Fix lint issue: remove unused pytest import
Co-Authored-By: Jo\u00E3o <joao@crewai.com>
2025-07-19 22:59:52 +00:00
Devin AI
b7cb0186bd Fix LLMGuardrailResult JSON parsing with trailing characters
- Extract robust JSON cleaning logic into shared clean_json_from_text() function
- Update LiteAgent to use clean_json_from_text() before model_validate_json()
- Add comprehensive test cases for JSON with trailing characters, markdown formatting, and prefixes
- Fixes GitHub issue #3191 where valid JSON failed to parse due to trailing text
- Maintains backward compatibility with existing JSON parsing behavior

Co-Authored-By: Jo\u00E3o <joao@crewai.com>
2025-07-19 22:56:22 +00:00
5 changed files with 291 additions and 19 deletions

View File

@@ -62,6 +62,7 @@ from crewai.utilities.agent_utils import (
render_text_description_and_args,
)
from crewai.utilities.converter import generate_model_description
from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text
from crewai.utilities.events.agent_events import (
AgentLogsExecutionEvent,
LiteAgentExecutionCompletedEvent,
@@ -355,8 +356,8 @@ class LiteAgent(FlowTrackable, BaseModel):
formatted_result: Optional[BaseModel] = None
if self.response_format:
try:
# Cast to BaseModel to ensure type safety
result = self.response_format.model_validate_json(agent_finish.output)
cleaned_output = clean_json_from_text(agent_finish.output)
result = self.response_format.model_validate_json(cleaned_output)
if isinstance(result, BaseModel):
formatted_result = result
except Exception as e:
@@ -622,4 +623,4 @@ class LiteAgent(FlowTrackable, BaseModel):
def _append_message(self, text: str, role: str = "assistant") -> None:
"""Append a message to the message list with the given role."""
self._messages.append(format_message_for_llm(text, role=role))
self._messages.append(format_message_for_llm(text, role=role))

View File

@@ -8,6 +8,22 @@ from crewai.agents.parser import OutputParserException
"""Parser for converting text outputs into Pydantic models."""
def clean_json_from_text(text: str) -> str:
"""Extract and clean JSON from text that may contain markdown or trailing characters."""
text = text.replace("```", "").replace("json", "")
json_pattern = r"\{(?:[^{}]|(?R))*\}"
matches = regex.finditer(json_pattern, text)
for match in matches:
try:
json_obj = json.loads(match.group())
json_obj = json.dumps(json_obj)
return str(json_obj)
except json.JSONDecodeError:
continue
return text
class CrewPydanticOutputParser:
"""Parses text outputs into specified Pydantic models."""
@@ -30,18 +46,4 @@ class CrewPydanticOutputParser:
raise OutputParserException(error=msg)
def _transform_in_valid_json(self, text) -> str:
text = text.replace("```", "").replace("json", "")
json_pattern = r"\{(?:[^{}]|(?R))*\}"
matches = regex.finditer(json_pattern, text)
for match in matches:
try:
# Attempt to parse the matched string as JSON
json_obj = json.loads(match.group())
# Return the first successfully parsed JSON object
json_obj = json.dumps(json_obj)
return str(json_obj)
except json.JSONDecodeError:
# If parsing fails, skip to the next match
continue
return text
return clean_json_from_text(text)

View File

@@ -0,0 +1,96 @@
from pydantic import BaseModel, Field
from crewai.utilities.crew_pydantic_output_parser import clean_json_from_text
class TestOutput(BaseModel):
summary: str = Field(description="A brief summary")
confidence: int = Field(description="Confidence level from 1-100")
def test_clean_json_from_text_with_trailing_characters():
"""Test that clean_json_from_text handles trailing characters correctly."""
text_with_trailing = '''{"summary": "Test summary", "confidence": 85}
Additional text after JSON that should be ignored.
Final Answer: This text should also be ignored.'''
cleaned = clean_json_from_text(text_with_trailing)
expected = '{"summary": "Test summary", "confidence": 85}'
assert cleaned == expected
def test_clean_json_from_text_with_markdown():
"""Test that clean_json_from_text handles markdown formatting correctly."""
text_with_markdown = '''```json
{"summary": "Test summary with markdown", "confidence": 90}
```'''
cleaned = clean_json_from_text(text_with_markdown)
expected = '{"summary": "Test summary with markdown", "confidence": 90}'
assert cleaned == expected
def test_clean_json_from_text_with_prefix():
"""Test that clean_json_from_text handles text prefix correctly."""
text_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
cleaned = clean_json_from_text(text_with_prefix)
expected = '{"summary": "Test summary with prefix", "confidence": 95}'
assert cleaned == expected
def test_clean_json_from_text_pure_json():
"""Test that clean_json_from_text handles pure JSON correctly."""
pure_json = '{"summary": "Pure JSON", "confidence": 100}'
cleaned = clean_json_from_text(pure_json)
assert cleaned == pure_json
def test_clean_json_from_text_no_json():
"""Test that clean_json_from_text returns original text when no JSON found."""
no_json_text = "This is just plain text with no JSON"
cleaned = clean_json_from_text(no_json_text)
assert cleaned == no_json_text
def test_clean_json_from_text_invalid_json():
"""Test that clean_json_from_text handles invalid JSON gracefully."""
invalid_json = '{"summary": "Invalid JSON", "confidence":}'
cleaned = clean_json_from_text(invalid_json)
assert cleaned == invalid_json
def test_clean_json_from_text_multiple_json_objects():
"""Test that clean_json_from_text returns the first valid JSON object."""
multiple_json = '''{"summary": "First JSON", "confidence": 80}
Some text in between.
{"summary": "Second JSON", "confidence": 90}'''
cleaned = clean_json_from_text(multiple_json)
expected = '{"summary": "First JSON", "confidence": 80}'
assert cleaned == expected
def test_clean_json_from_text_nested_json():
"""Test that clean_json_from_text handles nested JSON correctly."""
nested_json = '''{"summary": "Nested test", "details": {"score": 95, "category": "A"}, "confidence": 85}'''
cleaned = clean_json_from_text(nested_json)
assert cleaned == nested_json
def test_clean_json_from_text_with_complex_trailing():
"""Test the exact scenario from GitHub issue #3191."""
github_issue_text = '''{"valid": true, "feedback": null}
Agent failed to reach a final answer. This is likely a bug - please report it.
Error details: maximum recursion depth exceeded in comparison'''
cleaned = clean_json_from_text(github_issue_text)
expected = '{"valid": true, "feedback": null}'
assert cleaned == expected

View File

@@ -492,4 +492,102 @@ def test_lite_agent_with_invalid_llm():
backstory="Test backstory",
llm="invalid-model"
)
assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
assert "Expected LLM instance of type BaseLLM" in str(exc_info.value)
def test_lite_agent_structured_output_with_trailing_characters():
"""Test that LiteAgent can handle JSON responses with trailing characters."""
from unittest.mock import patch
class SimpleOutput(BaseModel):
summary: str = Field(description="A brief summary")
confidence: int = Field(description="Confidence level from 1-100")
mock_response_with_trailing = '''{"summary": "Test summary", "confidence": 85}
Additional text after JSON that should be ignored.
Final Answer: This text should also be ignored.'''
with patch('crewai.lite_agent.get_llm_response') as mock_llm:
mock_llm.return_value = mock_response_with_trailing
agent = LiteAgent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=LLM(model="gpt-4o-mini"),
)
result = agent.kickoff(
"Test message",
response_format=SimpleOutput
)
assert result.pydantic is not None
assert isinstance(result.pydantic, SimpleOutput)
assert result.pydantic.summary == "Test summary"
assert result.pydantic.confidence == 85
def test_lite_agent_structured_output_with_markdown():
"""Test that LiteAgent can handle JSON responses wrapped in markdown."""
from unittest.mock import patch
class SimpleOutput(BaseModel):
summary: str = Field(description="A brief summary")
confidence: int = Field(description="Confidence level from 1-100")
mock_response_with_markdown = '''```json
{"summary": "Test summary with markdown", "confidence": 90}
```'''
with patch('crewai.lite_agent.get_llm_response') as mock_llm:
mock_llm.return_value = mock_response_with_markdown
agent = LiteAgent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=LLM(model="gpt-4o-mini"),
)
result = agent.kickoff(
"Test message",
response_format=SimpleOutput
)
assert result.pydantic is not None
assert isinstance(result.pydantic, SimpleOutput)
assert result.pydantic.summary == "Test summary with markdown"
assert result.pydantic.confidence == 90
def test_lite_agent_structured_output_with_final_answer_prefix():
"""Test that LiteAgent can handle JSON responses with Final Answer prefix."""
from unittest.mock import patch
class SimpleOutput(BaseModel):
summary: str = Field(description="A brief summary")
confidence: int = Field(description="Confidence level from 1-100")
mock_response_with_prefix = '''Final Answer: {"summary": "Test summary with prefix", "confidence": 95}'''
with patch('crewai.lite_agent.get_llm_response') as mock_llm:
mock_llm.return_value = mock_response_with_prefix
agent = LiteAgent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=LLM(model="gpt-4o-mini"),
)
result = agent.kickoff(
"Test message",
response_format=SimpleOutput
)
assert result.pydantic is not None
assert isinstance(result.pydantic, SimpleOutput)
assert result.pydantic.summary == "Test summary with prefix"
assert result.pydantic.confidence == 95

View File

@@ -302,3 +302,78 @@ def test_hallucination_guardrail_description_in_events():
event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0)
assert event.guardrail == "HallucinationGuardrail (no-op)"
def test_llm_guardrail_with_trailing_characters():
"""Test that LLMGuardrail can handle responses with trailing characters."""
from unittest.mock import patch
mock_response_with_trailing = '''{"valid": true, "feedback": null}
Some additional text that should be ignored.
More trailing content.'''
with patch('crewai.Agent.kickoff') as mock_kickoff:
from crewai.agent import LiteAgentOutput
from crewai.tasks.llm_guardrail import LLMGuardrailResult
mock_output = LiteAgentOutput(
raw=mock_response_with_trailing,
pydantic=LLMGuardrailResult(valid=True, feedback=None),
agent_role="Guardrail Agent",
usage_metrics=None
)
mock_kickoff.return_value = mock_output
guardrail = LLMGuardrail(
description="Test guardrail",
llm=LLM(model="gpt-4o-mini")
)
task_output = TaskOutput(
raw="Test task output",
description="Test task",
expected_output="Output",
agent="Test Agent",
)
result = guardrail(task_output)
assert result[0] is True
assert result[1] == "Test task output"
def test_llm_guardrail_with_markdown_formatting():
"""Test that LLMGuardrail can handle responses with markdown formatting."""
from unittest.mock import patch
mock_response_with_markdown = '''```json
{"valid": false, "feedback": "The output does not meet the requirements"}
```'''
with patch('crewai.Agent.kickoff') as mock_kickoff:
from crewai.agent import LiteAgentOutput
from crewai.tasks.llm_guardrail import LLMGuardrailResult
mock_output = LiteAgentOutput(
raw=mock_response_with_markdown,
pydantic=LLMGuardrailResult(valid=False, feedback="The output does not meet the requirements"),
agent_role="Guardrail Agent",
usage_metrics=None
)
mock_kickoff.return_value = mock_output
guardrail = LLMGuardrail(
description="Test guardrail",
llm=LLM(model="gpt-4o-mini")
)
task_output = TaskOutput(
raw="Test task output",
description="Test task",
expected_output="Output",
agent="Test Agent",
)
result = guardrail(task_output)
assert result[0] is False
assert result[1] == "The output does not meet the requirements"