mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-25 08:08:14 +00:00
Implement LLM generations, logprobs, and XML parsing features
- Add support for n generations and logprobs parameters in LLM class - Extend Agent class to accept LLM generation parameters (llm_n, llm_logprobs, llm_top_logprobs) - Add return_full_completion parameter to access complete LLM response metadata - Implement XML parser utility for extracting content from tags like <thinking> - Add completion metadata support to TaskOutput and LiteAgentOutput classes - Add comprehensive tests and examples demonstrating new functionality - Maintain full backward compatibility with existing code Addresses issue #3052: How to obtain n generations or generations in different tags Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
158
tests/test_integration_llm_features.py
Normal file
158
tests/test_integration_llm_features.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai.lite_agent import LiteAgent
|
||||
from crewai.utilities.xml_parser import extract_xml_content
|
||||
|
||||
|
||||
class TestIntegrationLLMFeatures:
|
||||
"""Integration tests for LLM features with agents and tasks."""
|
||||
|
||||
@patch('crewai.llm.litellm.completion')
|
||||
def test_agent_with_multiple_generations(self, mock_completion):
|
||||
"""Test agent execution with multiple generations."""
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [
|
||||
Mock(message=Mock(content="Generation 1")),
|
||||
Mock(message=Mock(content="Generation 2")),
|
||||
Mock(message=Mock(content="Generation 3")),
|
||||
]
|
||||
mock_response.usage = {"prompt_tokens": 20, "completion_tokens": 30}
|
||||
mock_response.model = "gpt-3.5-turbo"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.id = "test-id"
|
||||
mock_response.object = "chat.completion"
|
||||
mock_response.system_fingerprint = "test-fingerprint"
|
||||
mock_completion.return_value = mock_response
|
||||
|
||||
llm = LLM(model="gpt-3.5-turbo", n=3, return_full_completion=True)
|
||||
agent = Agent(
|
||||
role="writer",
|
||||
goal="write content",
|
||||
backstory="You are a writer",
|
||||
llm=llm,
|
||||
return_completion_metadata=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Write a short story",
|
||||
agent=agent,
|
||||
expected_output="A short story",
|
||||
)
|
||||
|
||||
with patch.object(agent, 'agent_executor') as mock_executor:
|
||||
mock_executor.invoke.return_value = {"output": "Generation 1"}
|
||||
|
||||
result = agent.execute_task(task)
|
||||
assert result == "Generation 1"
|
||||
|
||||
@patch('crewai.llm.litellm.completion')
|
||||
def test_lite_agent_with_xml_extraction(self, mock_completion):
|
||||
"""Test LiteAgent with XML content extraction."""
|
||||
response_with_xml = """
|
||||
<thinking>
|
||||
I need to analyze this problem step by step.
|
||||
First, I'll consider the requirements.
|
||||
</thinking>
|
||||
|
||||
Based on my analysis, here's the solution: The answer is 42.
|
||||
"""
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [Mock(message=Mock(content=response_with_xml))]
|
||||
mock_response.usage = {"prompt_tokens": 15, "completion_tokens": 25}
|
||||
mock_response.model = "gpt-3.5-turbo"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.id = "test-id"
|
||||
mock_response.object = "chat.completion"
|
||||
mock_response.system_fingerprint = "test-fingerprint"
|
||||
mock_completion.return_value = mock_response
|
||||
|
||||
lite_agent = LiteAgent(
|
||||
role="analyst",
|
||||
goal="analyze problems",
|
||||
backstory="You are an analyst",
|
||||
llm=LLM(model="gpt-3.5-turbo", return_full_completion=True),
|
||||
)
|
||||
|
||||
with patch.object(lite_agent, '_invoke_loop') as mock_invoke:
|
||||
mock_invoke.return_value = response_with_xml
|
||||
|
||||
result = lite_agent.kickoff("Analyze this problem")
|
||||
|
||||
thinking_content = extract_xml_content(result.raw, "thinking")
|
||||
assert thinking_content is not None
|
||||
assert "step by step" in thinking_content
|
||||
assert "requirements" in thinking_content
|
||||
|
||||
def test_xml_parser_with_complex_agent_output(self):
|
||||
"""Test XML parser with complex agent output containing multiple tags."""
|
||||
complex_output = """
|
||||
<thinking>
|
||||
This is a complex problem that requires careful analysis.
|
||||
I need to break it down into steps.
|
||||
</thinking>
|
||||
|
||||
<reasoning>
|
||||
Step 1: Understand the requirements
|
||||
Step 2: Analyze the constraints
|
||||
Step 3: Develop a solution
|
||||
</reasoning>
|
||||
|
||||
<conclusion>
|
||||
The best approach is to use a systematic methodology.
|
||||
</conclusion>
|
||||
|
||||
Final answer: Use the systematic approach outlined above.
|
||||
"""
|
||||
|
||||
thinking = extract_xml_content(complex_output, "thinking")
|
||||
reasoning = extract_xml_content(complex_output, "reasoning")
|
||||
conclusion = extract_xml_content(complex_output, "conclusion")
|
||||
|
||||
assert thinking is not None
|
||||
assert "complex problem" in thinking
|
||||
assert reasoning is not None
|
||||
assert "Step 1" in reasoning
|
||||
assert "Step 2" in reasoning
|
||||
assert "Step 3" in reasoning
|
||||
assert conclusion is not None
|
||||
assert "systematic methodology" in conclusion
|
||||
|
||||
@patch('crewai.llm.litellm.completion')
|
||||
def test_crew_with_llm_parameters(self, mock_completion):
|
||||
"""Test crew execution with LLM parameters."""
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [Mock(message=Mock(content="Test response"))]
|
||||
mock_response.usage = {"prompt_tokens": 10, "completion_tokens": 5}
|
||||
mock_response.model = "gpt-3.5-turbo"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.id = "test-id"
|
||||
mock_response.object = "chat.completion"
|
||||
mock_response.system_fingerprint = "test-fingerprint"
|
||||
mock_completion.return_value = mock_response
|
||||
|
||||
agent = Agent(
|
||||
role="analyst",
|
||||
goal="analyze data",
|
||||
backstory="You are an analyst",
|
||||
llm_n=2,
|
||||
llm_logprobs=5,
|
||||
return_completion_metadata=True,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Analyze the data",
|
||||
agent=agent,
|
||||
expected_output="Analysis results",
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
|
||||
with patch.object(crew, 'kickoff') as mock_kickoff:
|
||||
mock_output = Mock()
|
||||
mock_output.tasks_output = [Mock(completion_metadata={"choices": mock_response.choices})]
|
||||
mock_kickoff.return_value = mock_output
|
||||
|
||||
result = crew.kickoff()
|
||||
assert result is not None
|
||||
227
tests/test_llm_generations_logprobs.py
Normal file
227
tests/test_llm_generations_logprobs.py
Normal file
@@ -0,0 +1,227 @@
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
from crewai import Agent, Task, LLM
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.lite_agent import LiteAgent, LiteAgentOutput
|
||||
from crewai.utilities.xml_parser import (
|
||||
extract_xml_content,
|
||||
extract_all_xml_content,
|
||||
extract_multiple_xml_tags,
|
||||
extract_multiple_xml_tags_all,
|
||||
extract_xml_with_attributes,
|
||||
remove_xml_tags,
|
||||
strip_xml_tags_keep_content,
|
||||
)
|
||||
|
||||
|
||||
class TestLLMGenerationsLogprobs:
|
||||
"""Test suite for LLM generations and logprobs functionality."""
|
||||
|
||||
def test_llm_with_n_parameter(self):
|
||||
"""Test that LLM accepts n parameter for multiple generations."""
|
||||
llm = LLM(model="gpt-3.5-turbo", n=3)
|
||||
assert llm.n == 3
|
||||
|
||||
def test_llm_with_logprobs_parameter(self):
|
||||
"""Test that LLM accepts logprobs parameter."""
|
||||
llm = LLM(model="gpt-3.5-turbo", logprobs=5)
|
||||
assert llm.logprobs == 5
|
||||
|
||||
def test_llm_with_return_full_completion(self):
|
||||
"""Test that LLM accepts return_full_completion parameter."""
|
||||
llm = LLM(model="gpt-3.5-turbo", return_full_completion=True)
|
||||
assert llm.return_full_completion is True
|
||||
|
||||
def test_agent_with_llm_parameters(self):
|
||||
"""Test that Agent accepts LLM generation parameters."""
|
||||
agent = Agent(
|
||||
role="test",
|
||||
goal="test",
|
||||
backstory="test",
|
||||
llm_n=3,
|
||||
llm_logprobs=5,
|
||||
llm_top_logprobs=3,
|
||||
return_completion_metadata=True,
|
||||
)
|
||||
assert agent.llm_n == 3
|
||||
assert agent.llm_logprobs == 5
|
||||
assert agent.llm_top_logprobs == 3
|
||||
assert agent.return_completion_metadata is True
|
||||
|
||||
@patch('crewai.llm.litellm.completion')
|
||||
def test_llm_call_returns_full_completion(self, mock_completion):
|
||||
"""Test that LLM.call can return full completion object."""
|
||||
mock_response = Mock()
|
||||
mock_response.choices = [Mock()]
|
||||
mock_response.choices[0].message.content = "Test response"
|
||||
mock_response.usage = {"prompt_tokens": 10, "completion_tokens": 5}
|
||||
mock_response.model = "gpt-3.5-turbo"
|
||||
mock_response.created = 1234567890
|
||||
mock_response.id = "test-id"
|
||||
mock_response.object = "chat.completion"
|
||||
mock_response.system_fingerprint = "test-fingerprint"
|
||||
mock_completion.return_value = mock_response
|
||||
|
||||
llm = LLM(model="gpt-3.5-turbo", return_full_completion=True)
|
||||
result = llm.call("Test message")
|
||||
|
||||
assert isinstance(result, dict)
|
||||
assert result["content"] == "Test response"
|
||||
assert "choices" in result
|
||||
assert "usage" in result
|
||||
assert result["model"] == "gpt-3.5-turbo"
|
||||
|
||||
def test_task_output_completion_metadata(self):
|
||||
"""Test TaskOutput with completion metadata."""
|
||||
mock_choices = [
|
||||
Mock(message=Mock(content="Generation 1")),
|
||||
Mock(message=Mock(content="Generation 2")),
|
||||
]
|
||||
mock_usage = {"prompt_tokens": 10, "completion_tokens": 15}
|
||||
|
||||
completion_metadata = {
|
||||
"choices": mock_choices,
|
||||
"usage": mock_usage,
|
||||
"model": "gpt-3.5-turbo",
|
||||
}
|
||||
|
||||
task_output = TaskOutput(
|
||||
description="Test task",
|
||||
raw="Generation 1",
|
||||
agent="test-agent",
|
||||
completion_metadata=completion_metadata,
|
||||
)
|
||||
|
||||
generations = task_output.get_generations()
|
||||
assert generations == ["Generation 1", "Generation 2"]
|
||||
|
||||
usage = task_output.get_usage_metrics()
|
||||
assert usage == mock_usage
|
||||
|
||||
def test_lite_agent_output_completion_metadata(self):
|
||||
"""Test LiteAgentOutput with completion metadata."""
|
||||
mock_choices = [
|
||||
Mock(message=Mock(content="Generation 1")),
|
||||
Mock(message=Mock(content="Generation 2")),
|
||||
]
|
||||
mock_usage = {"prompt_tokens": 10, "completion_tokens": 15}
|
||||
|
||||
completion_metadata = {
|
||||
"choices": mock_choices,
|
||||
"usage": mock_usage,
|
||||
"model": "gpt-3.5-turbo",
|
||||
}
|
||||
|
||||
output = LiteAgentOutput(
|
||||
raw="Generation 1",
|
||||
agent_role="test-agent",
|
||||
completion_metadata=completion_metadata,
|
||||
)
|
||||
|
||||
generations = output.get_generations()
|
||||
assert generations == ["Generation 1", "Generation 2"]
|
||||
|
||||
usage = output.get_usage_metrics_from_completion()
|
||||
assert usage == mock_usage
|
||||
|
||||
|
||||
class TestXMLParser:
|
||||
"""Test suite for XML parsing functionality."""
|
||||
|
||||
def test_extract_xml_content_basic(self):
|
||||
"""Test basic XML content extraction."""
|
||||
text = "Some text <thinking>This is my thought</thinking> more text"
|
||||
result = extract_xml_content(text, "thinking")
|
||||
assert result == "This is my thought"
|
||||
|
||||
def test_extract_xml_content_not_found(self):
|
||||
"""Test XML content extraction when tag not found."""
|
||||
text = "Some text without the tag"
|
||||
result = extract_xml_content(text, "thinking")
|
||||
assert result is None
|
||||
|
||||
def test_extract_xml_content_multiline(self):
|
||||
"""Test XML content extraction with multiline content."""
|
||||
text = """Some text
|
||||
<thinking>
|
||||
This is a multiline
|
||||
thought process
|
||||
</thinking>
|
||||
more text"""
|
||||
result = extract_xml_content(text, "thinking")
|
||||
assert "multiline" in result
|
||||
assert "thought process" in result
|
||||
|
||||
def test_extract_all_xml_content(self):
|
||||
"""Test extracting all occurrences of XML content."""
|
||||
text = """
|
||||
<thinking>First thought</thinking>
|
||||
Some text
|
||||
<thinking>Second thought</thinking>
|
||||
"""
|
||||
result = extract_all_xml_content(text, "thinking")
|
||||
assert len(result) == 2
|
||||
assert result[0] == "First thought"
|
||||
assert result[1] == "Second thought"
|
||||
|
||||
def test_extract_multiple_xml_tags(self):
|
||||
"""Test extracting multiple different XML tags."""
|
||||
text = """
|
||||
<thinking>My thoughts</thinking>
|
||||
<reasoning>My reasoning</reasoning>
|
||||
<conclusion>My conclusion</conclusion>
|
||||
"""
|
||||
result = extract_multiple_xml_tags(text, ["thinking", "reasoning", "conclusion"])
|
||||
assert result["thinking"] == "My thoughts"
|
||||
assert result["reasoning"] == "My reasoning"
|
||||
assert result["conclusion"] == "My conclusion"
|
||||
|
||||
def test_extract_multiple_xml_tags_all(self):
|
||||
"""Test extracting all occurrences of multiple XML tags."""
|
||||
text = """
|
||||
<thinking>First thought</thinking>
|
||||
<reasoning>First reasoning</reasoning>
|
||||
<thinking>Second thought</thinking>
|
||||
"""
|
||||
result = extract_multiple_xml_tags_all(text, ["thinking", "reasoning"])
|
||||
assert len(result["thinking"]) == 2
|
||||
assert len(result["reasoning"]) == 1
|
||||
assert result["thinking"][0] == "First thought"
|
||||
assert result["thinking"][1] == "Second thought"
|
||||
|
||||
def test_extract_xml_with_attributes(self):
|
||||
"""Test extracting XML with attributes."""
|
||||
text = '<thinking type="deep" level="2">Complex thought</thinking>'
|
||||
result = extract_xml_with_attributes(text, "thinking")
|
||||
assert len(result) == 1
|
||||
assert result[0]["content"] == "Complex thought"
|
||||
assert result[0]["attributes"]["type"] == "deep"
|
||||
assert result[0]["attributes"]["level"] == "2"
|
||||
|
||||
def test_remove_xml_tags(self):
|
||||
"""Test removing XML tags and their content."""
|
||||
text = "Keep this <thinking>Remove this</thinking> and this"
|
||||
result = remove_xml_tags(text, ["thinking"])
|
||||
assert result == "Keep this and this"
|
||||
|
||||
def test_strip_xml_tags_keep_content(self):
|
||||
"""Test stripping XML tags but keeping content."""
|
||||
text = "Keep this <thinking>Keep this too</thinking> and this"
|
||||
result = strip_xml_tags_keep_content(text, ["thinking"])
|
||||
assert result == "Keep this Keep this too and this"
|
||||
|
||||
def test_nested_xml_tags(self):
|
||||
"""Test handling of nested XML tags."""
|
||||
text = "<outer>Before <inner>nested content</inner> after</outer>"
|
||||
result = extract_xml_content(text, "outer")
|
||||
assert "Before" in result
|
||||
assert "nested content" in result
|
||||
assert "after" in result
|
||||
|
||||
def test_xml_with_special_characters(self):
|
||||
"""Test XML parsing with special characters."""
|
||||
text = "<thinking>Content with & < > \" ' characters</thinking>"
|
||||
result = extract_xml_content(text, "thinking")
|
||||
assert "&" in result
|
||||
assert "<" in result
|
||||
assert ">" in result
|
||||
162
tests/test_xml_parser_examples.py
Normal file
162
tests/test_xml_parser_examples.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import pytest
|
||||
from crewai.utilities.xml_parser import (
|
||||
extract_xml_content,
|
||||
extract_all_xml_content,
|
||||
extract_multiple_xml_tags,
|
||||
remove_xml_tags,
|
||||
strip_xml_tags_keep_content,
|
||||
)
|
||||
|
||||
|
||||
class TestXMLParserExamples:
|
||||
"""Test XML parser with realistic agent output examples."""
|
||||
|
||||
def test_agent_thinking_extraction(self):
|
||||
"""Test extracting thinking content from agent output."""
|
||||
agent_output = """
|
||||
I need to solve this problem step by step.
|
||||
|
||||
<thinking>
|
||||
Let me break this down:
|
||||
1. First, I need to understand the requirements
|
||||
2. Then, I'll analyze the constraints
|
||||
3. Finally, I'll propose a solution
|
||||
|
||||
The key insight is that we need to balance efficiency with accuracy.
|
||||
</thinking>
|
||||
|
||||
Based on my analysis, here's my recommendation: Use approach A.
|
||||
"""
|
||||
|
||||
thinking = extract_xml_content(agent_output, "thinking")
|
||||
assert thinking is not None
|
||||
assert "break this down" in thinking
|
||||
assert "requirements" in thinking
|
||||
assert "constraints" in thinking
|
||||
assert "efficiency with accuracy" in thinking
|
||||
|
||||
def test_multiple_reasoning_tags(self):
|
||||
"""Test extracting multiple reasoning sections."""
|
||||
agent_output = """
|
||||
<reasoning>
|
||||
Initial analysis shows three possible approaches.
|
||||
</reasoning>
|
||||
|
||||
Let me explore each option:
|
||||
|
||||
<reasoning>
|
||||
Option A: Fast but less accurate
|
||||
Option B: Slow but very accurate
|
||||
Option C: Balanced approach
|
||||
</reasoning>
|
||||
|
||||
My final recommendation is Option C.
|
||||
"""
|
||||
|
||||
reasoning_sections = extract_all_xml_content(agent_output, "reasoning")
|
||||
assert len(reasoning_sections) == 2
|
||||
assert "three possible approaches" in reasoning_sections[0]
|
||||
assert "Option A" in reasoning_sections[1]
|
||||
assert "Option B" in reasoning_sections[1]
|
||||
assert "Option C" in reasoning_sections[1]
|
||||
|
||||
def test_complex_agent_workflow(self):
|
||||
"""Test complex agent output with multiple tag types."""
|
||||
complex_output = """
|
||||
<thinking>
|
||||
This is a complex problem requiring systematic analysis.
|
||||
I need to consider multiple factors.
|
||||
</thinking>
|
||||
|
||||
<analysis>
|
||||
Factor 1: Performance requirements
|
||||
Factor 2: Cost constraints
|
||||
Factor 3: Time limitations
|
||||
</analysis>
|
||||
|
||||
<reasoning>
|
||||
Given the analysis above, I believe we should prioritize performance
|
||||
while keeping costs reasonable. Time is less critical in this case.
|
||||
</reasoning>
|
||||
|
||||
<conclusion>
|
||||
Recommend Solution X with performance optimizations.
|
||||
</conclusion>
|
||||
|
||||
Final answer: Implement Solution X with the following optimizations...
|
||||
"""
|
||||
|
||||
extracted = extract_multiple_xml_tags(
|
||||
complex_output,
|
||||
["thinking", "analysis", "reasoning", "conclusion"]
|
||||
)
|
||||
|
||||
assert extracted["thinking"] is not None
|
||||
assert "systematic analysis" in extracted["thinking"]
|
||||
|
||||
assert extracted["analysis"] is not None
|
||||
assert "Factor 1" in extracted["analysis"]
|
||||
assert "Factor 2" in extracted["analysis"]
|
||||
assert "Factor 3" in extracted["analysis"]
|
||||
|
||||
assert extracted["reasoning"] is not None
|
||||
assert "prioritize performance" in extracted["reasoning"]
|
||||
|
||||
assert extracted["conclusion"] is not None
|
||||
assert "Solution X" in extracted["conclusion"]
|
||||
|
||||
def test_clean_output_for_user(self):
|
||||
"""Test cleaning agent output for user presentation."""
|
||||
raw_output = """
|
||||
<thinking>
|
||||
Internal reasoning that user shouldn't see.
|
||||
This contains implementation details.
|
||||
</thinking>
|
||||
|
||||
<debug>
|
||||
Debug information: variable X = 42
|
||||
</debug>
|
||||
|
||||
Here's the answer to your question: The solution is to use method Y.
|
||||
|
||||
<internal_notes>
|
||||
Remember to update the documentation later.
|
||||
</internal_notes>
|
||||
|
||||
This approach will give you the best results.
|
||||
"""
|
||||
|
||||
clean_output = remove_xml_tags(
|
||||
raw_output,
|
||||
["thinking", "debug", "internal_notes"]
|
||||
)
|
||||
|
||||
assert "Internal reasoning" not in clean_output
|
||||
assert "Debug information" not in clean_output
|
||||
assert "update the documentation" not in clean_output
|
||||
assert "Here's the answer" in clean_output
|
||||
assert "method Y" in clean_output
|
||||
assert "best results" in clean_output
|
||||
|
||||
def test_preserve_structured_content(self):
|
||||
"""Test preserving structured content while removing tags."""
|
||||
structured_output = """
|
||||
<steps>
|
||||
1. Initialize the system
|
||||
2. Load the configuration
|
||||
3. Process the data
|
||||
4. Generate the report
|
||||
</steps>
|
||||
|
||||
Follow these steps to complete the task.
|
||||
"""
|
||||
|
||||
clean_output = strip_xml_tags_keep_content(structured_output, ["steps"])
|
||||
|
||||
assert "<steps>" not in clean_output
|
||||
assert "</steps>" not in clean_output
|
||||
assert "1. Initialize" in clean_output
|
||||
assert "2. Load" in clean_output
|
||||
assert "3. Process" in clean_output
|
||||
assert "4. Generate" in clean_output
|
||||
assert "Follow these steps" in clean_output
|
||||
Reference in New Issue
Block a user