fix: integrate robust JSON validation for lite agent structured outputs

- Replace simple model_validate_json with convert_to_model function - Add get_output_converter method to support converter pattern - Handle malformed JSON wrapped in markdown code blocks - Add comprehensive test for malformed JSON extraction - Fixes #3480 Co-Authored-By: João <joao@crewai.com>
2026-01-10 00:28:31 +00:00 · 2025-09-09 13:17:43 +00:00
parent d5126d159b
commit 934c63ede1
2 changed files with 67 additions and 4 deletions
--- a/src/crewai/lite_agent.py
+++ b/src/crewai/lite_agent.py
@@ -44,6 +44,7 @@ from crewai.llm import LLM, BaseLLM
 from crewai.tools.base_tool import BaseTool
 from crewai.tools.structured_tool import CrewStructuredTool
 from crewai.utilities import I18N
+from crewai.utilities.converter import convert_to_model
 from crewai.utilities.guardrail import process_guardrail
 from crewai.utilities.agent_utils import (
    enforce_rpm_limit,
@@ -354,10 +355,15 @@ class LiteAgent(FlowTrackable, BaseModel):
        formatted_result: Optional[BaseModel] = None
        if self.response_format:
            try:
-                # Cast to BaseModel to ensure type safety
-                result = self.response_format.model_validate_json(agent_finish.output)
-                if isinstance(result, BaseModel):
-                    formatted_result = result
+                converted_result = convert_to_model(
+                    result=agent_finish.output,
+                    output_pydantic=self.response_format,
+                    output_json=None,
+                    agent=self,
+                    converter_cls=None,
+                )
+                if isinstance(converted_result, BaseModel):
+                    formatted_result = converted_result
            except Exception as e:
                self._printer.print(
                    content=f"Failed to parse output into response format: {str(e)}",
@@ -596,3 +602,13 @@ class LiteAgent(FlowTrackable, BaseModel):
    def _append_message(self, text: str, role: str = "assistant") -> None:
        """Append a message to the message list with the given role."""
        self._messages.append(format_message_for_llm(text, role=role))
+
+    def get_output_converter(self, llm, model, instructions):
+        """Get the converter class for the agent to create json/pydantic outputs."""
+        from crewai.utilities.converter import Converter
+        return Converter(
+            text="",
+            llm=llm,
+            model=model,
+            instructions=instructions,
+        )
--- a/tests/agents/test_lite_agent.py
+++ b/tests/agents/test_lite_agent.py
@@ -520,6 +520,53 @@ def test_lite_agent_with_custom_llm_and_guardrails():
    assert result2.raw == "Modified by guardrail"


+def test_lite_agent_structured_output_with_malformed_json():
+    """Test that LiteAgent can handle malformed JSON wrapped in markdown blocks."""
+    
+    class FounderNames(BaseModel):
+        names: list[str] = Field(description="List of founder names")
+    
+    class MockLLMWithMalformedJSON(BaseLLM):
+        def __init__(self):
+            super().__init__(model="mock-model")
+        
+        def call(self, messages, **kwargs):
+            return '''Thought: I need to extract the founder names
+Final Answer: ```json
+{
+  "names": ["John Smith", "Jane Doe"]
+}
+```'''
+        
+        def supports_function_calling(self):
+            return False
+        
+        def supports_stop_words(self):
+            return False
+        
+        def get_context_window_size(self):
+            return 4096
+    
+    mock_llm = MockLLMWithMalformedJSON()
+    
+    agent = Agent(
+        role="Data Extraction Specialist",
+        goal="Extract founder names from text",
+        backstory="You extract and structure information accurately.",
+        llm=mock_llm,
+        verbose=True,
+    )
+    
+    result = agent.kickoff(
+        messages="Extract founder names from: 'The company was founded by John Smith and Jane Doe.'",
+        response_format=FounderNames
+    )
+    
+    assert result.pydantic is not None, "Should successfully parse malformed JSON"
+    assert isinstance(result.pydantic, FounderNames), "Should return correct Pydantic model"
+    assert result.pydantic.names == ["John Smith", "Jane Doe"], "Should extract correct founder names"
+
+
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_lite_agent_with_invalid_llm():
    """Test that LiteAgent raises proper error when create_llm returns None."""