from crewai.utilities.xml_parser import (
extract_xml_content,
extract_all_xml_content,
extract_multiple_xml_tags,
remove_xml_tags,
strip_xml_tags_keep_content,
)
class TestXMLParserExamples:
"""Test XML parser with realistic agent output examples."""
def test_agent_thinking_extraction(self):
"""Test extracting thinking content from agent output."""
agent_output = """
I need to solve this problem step by step.
Let me break this down:
1. First, I need to understand the requirements
2. Then, I'll analyze the constraints
3. Finally, I'll propose a solution
The key insight is that we need to balance efficiency with accuracy.
Based on my analysis, here's my recommendation: Use approach A.
"""
thinking = extract_xml_content(agent_output, "thinking")
assert thinking is not None
assert "break this down" in thinking
assert "requirements" in thinking
assert "constraints" in thinking
assert "efficiency with accuracy" in thinking
def test_multiple_reasoning_tags(self):
"""Test extracting multiple reasoning sections."""
agent_output = """
Initial analysis shows three possible approaches.
Let me explore each option:
Option A: Fast but less accurate
Option B: Slow but very accurate
Option C: Balanced approach
My final recommendation is Option C.
"""
reasoning_sections = extract_all_xml_content(agent_output, "reasoning")
assert len(reasoning_sections) == 2
assert "three possible approaches" in reasoning_sections[0]
assert "Option A" in reasoning_sections[1]
assert "Option B" in reasoning_sections[1]
assert "Option C" in reasoning_sections[1]
def test_complex_agent_workflow(self):
"""Test complex agent output with multiple tag types."""
complex_output = """
This is a complex problem requiring systematic analysis.
I need to consider multiple factors.
Factor 1: Performance requirements
Factor 2: Cost constraints
Factor 3: Time limitations
Given the analysis above, I believe we should prioritize performance
while keeping costs reasonable. Time is less critical in this case.
Recommend Solution X with performance optimizations.
Final answer: Implement Solution X with the following optimizations...
"""
extracted = extract_multiple_xml_tags(
complex_output,
["thinking", "analysis", "reasoning", "conclusion"]
)
assert extracted["thinking"] is not None
assert "systematic analysis" in extracted["thinking"]
assert extracted["analysis"] is not None
assert "Factor 1" in extracted["analysis"]
assert "Factor 2" in extracted["analysis"]
assert "Factor 3" in extracted["analysis"]
assert extracted["reasoning"] is not None
assert "prioritize performance" in extracted["reasoning"]
assert extracted["conclusion"] is not None
assert "Solution X" in extracted["conclusion"]
def test_clean_output_for_user(self):
"""Test cleaning agent output for user presentation."""
raw_output = """
Internal reasoning that user shouldn't see.
This contains implementation details.
Debug information: variable X = 42
Here's the answer to your question: The solution is to use method Y.
Remember to update the documentation later.
This approach will give you the best results.
"""
clean_output = remove_xml_tags(
raw_output,
["thinking", "debug", "internal_notes"]
)
assert "Internal reasoning" not in clean_output
assert "Debug information" not in clean_output
assert "update the documentation" not in clean_output
assert "Here's the answer" in clean_output
assert "method Y" in clean_output
assert "best results" in clean_output
def test_preserve_structured_content(self):
"""Test preserving structured content while removing tags."""
structured_output = """
1. Initialize the system
2. Load the configuration
3. Process the data
4. Generate the report
Follow these steps to complete the task.
"""
clean_output = strip_xml_tags_keep_content(structured_output, ["steps"])
assert "" not in clean_output
assert "" not in clean_output
assert "1. Initialize" in clean_output
assert "2. Load" in clean_output
assert "3. Process" in clean_output
assert "4. Generate" in clean_output
assert "Follow these steps" in clean_output