fix: integrate robust JSON validation for lite agent structured outputs

- Replace simple model_validate_json with convert_to_model function
- Add get_output_converter method to support converter pattern
- Handle malformed JSON wrapped in markdown code blocks
- Add comprehensive test for malformed JSON extraction
- Fixes #3480

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-09-09 13:17:43 +00:00
parent d5126d159b
commit 934c63ede1
2 changed files with 67 additions and 4 deletions

View File

@@ -44,6 +44,7 @@ from crewai.llm import LLM, BaseLLM
from crewai.tools.base_tool import BaseTool
from crewai.tools.structured_tool import CrewStructuredTool
from crewai.utilities import I18N
from crewai.utilities.converter import convert_to_model
from crewai.utilities.guardrail import process_guardrail
from crewai.utilities.agent_utils import (
enforce_rpm_limit,
@@ -354,10 +355,15 @@ class LiteAgent(FlowTrackable, BaseModel):
formatted_result: Optional[BaseModel] = None
if self.response_format:
try:
# Cast to BaseModel to ensure type safety
result = self.response_format.model_validate_json(agent_finish.output)
if isinstance(result, BaseModel):
formatted_result = result
converted_result = convert_to_model(
result=agent_finish.output,
output_pydantic=self.response_format,
output_json=None,
agent=self,
converter_cls=None,
)
if isinstance(converted_result, BaseModel):
formatted_result = converted_result
except Exception as e:
self._printer.print(
content=f"Failed to parse output into response format: {str(e)}",
@@ -596,3 +602,13 @@ class LiteAgent(FlowTrackable, BaseModel):
def _append_message(self, text: str, role: str = "assistant") -> None:
"""Append a message to the message list with the given role."""
self._messages.append(format_message_for_llm(text, role=role))
def get_output_converter(self, llm, model, instructions):
"""Get the converter class for the agent to create json/pydantic outputs."""
from crewai.utilities.converter import Converter
return Converter(
text="",
llm=llm,
model=model,
instructions=instructions,
)

View File

@@ -520,6 +520,53 @@ def test_lite_agent_with_custom_llm_and_guardrails():
assert result2.raw == "Modified by guardrail"
def test_lite_agent_structured_output_with_malformed_json():
"""Test that LiteAgent can handle malformed JSON wrapped in markdown blocks."""
class FounderNames(BaseModel):
names: list[str] = Field(description="List of founder names")
class MockLLMWithMalformedJSON(BaseLLM):
def __init__(self):
super().__init__(model="mock-model")
def call(self, messages, **kwargs):
return '''Thought: I need to extract the founder names
Final Answer: ```json
{
"names": ["John Smith", "Jane Doe"]
}
```'''
def supports_function_calling(self):
return False
def supports_stop_words(self):
return False
def get_context_window_size(self):
return 4096
mock_llm = MockLLMWithMalformedJSON()
agent = Agent(
role="Data Extraction Specialist",
goal="Extract founder names from text",
backstory="You extract and structure information accurately.",
llm=mock_llm,
verbose=True,
)
result = agent.kickoff(
messages="Extract founder names from: 'The company was founded by John Smith and Jane Doe.'",
response_format=FounderNames
)
assert result.pydantic is not None, "Should successfully parse malformed JSON"
assert isinstance(result.pydantic, FounderNames), "Should return correct Pydantic model"
assert result.pydantic.names == ["John Smith", "Jane Doe"], "Should extract correct founder names"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_lite_agent_with_invalid_llm():
"""Test that LiteAgent raises proper error when create_llm returns None."""