mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-10 04:52:40 +00:00
Some checks failed
* imp compaction * fix lint * cassette gen * cassette gen * improve assert * adding azure * fix global docstring
285 lines
9.0 KiB
Python
285 lines
9.0 KiB
Python
"""
|
|
Integration tests for structured context compaction (summarize_messages).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
from crewai.agent import Agent
|
|
from crewai.crew import Crew
|
|
from crewai.llm import LLM
|
|
from crewai.task import Task
|
|
from crewai.utilities.agent_utils import summarize_messages
|
|
from crewai.utilities.i18n import I18N
|
|
|
|
|
|
def _build_conversation_messages(
|
|
*, include_system: bool = True, include_files: bool = False
|
|
) -> list[dict[str, Any]]:
|
|
"""Build a realistic multi-turn conversation for summarization tests."""
|
|
messages: list[dict[str, Any]] = []
|
|
|
|
if include_system:
|
|
messages.append(
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
"You are a research assistant specializing in AI topics. "
|
|
"Your goal is to find accurate, up-to-date information."
|
|
),
|
|
}
|
|
)
|
|
|
|
user_msg: dict[str, Any] = {
|
|
"role": "user",
|
|
"content": (
|
|
"Research the latest developments in large language models. "
|
|
"Focus on architecture improvements and training techniques."
|
|
),
|
|
}
|
|
if include_files:
|
|
user_msg["files"] = {"reference.pdf": MagicMock()}
|
|
messages.append(user_msg)
|
|
|
|
messages.append(
|
|
{
|
|
"role": "assistant",
|
|
"content": (
|
|
"I'll research the latest developments in large language models. "
|
|
"Based on my knowledge, recent advances include:\n"
|
|
"1. Mixture of Experts (MoE) architectures\n"
|
|
"2. Improved attention mechanisms like Flash Attention\n"
|
|
"3. Better training data curation techniques\n"
|
|
"4. Constitutional AI and RLHF improvements"
|
|
),
|
|
}
|
|
)
|
|
|
|
messages.append(
|
|
{
|
|
"role": "user",
|
|
"content": "Can you go deeper on the MoE architectures? What are the key papers?",
|
|
}
|
|
)
|
|
|
|
messages.append(
|
|
{
|
|
"role": "assistant",
|
|
"content": (
|
|
"Key papers on Mixture of Experts:\n"
|
|
"- Switch Transformers (Google, 2021) - simplified MoE routing\n"
|
|
"- GShard - scaling to 600B parameters\n"
|
|
"- Mixtral (Mistral AI) - open-source MoE model\n"
|
|
"The main advantage is computational efficiency: "
|
|
"only a subset of experts is activated per token."
|
|
),
|
|
}
|
|
)
|
|
|
|
return messages
|
|
|
|
|
|
class TestSummarizeDirectOpenAI:
|
|
"""Test direct summarize_messages calls with OpenAI."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_summarize_direct_openai(self) -> None:
|
|
"""Test summarize_messages with gpt-4o-mini preserves system messages."""
|
|
llm = LLM(model="gpt-4o-mini", temperature=0)
|
|
i18n = I18N()
|
|
messages = _build_conversation_messages(include_system=True)
|
|
|
|
original_system_content = messages[0]["content"]
|
|
|
|
summarize_messages(
|
|
messages=messages,
|
|
llm=llm,
|
|
callbacks=[],
|
|
i18n=i18n,
|
|
)
|
|
|
|
# System message should be preserved
|
|
assert len(messages) >= 2
|
|
assert messages[0]["role"] == "system"
|
|
assert messages[0]["content"] == original_system_content
|
|
|
|
# Summary should be a user message with <summary> block
|
|
summary_msg = messages[-1]
|
|
assert summary_msg["role"] == "user"
|
|
assert len(summary_msg["content"]) > 0
|
|
assert "<summary>" in summary_msg["content"]
|
|
assert "</summary>" in summary_msg["content"]
|
|
|
|
|
|
class TestSummarizeDirectAnthropic:
|
|
"""Test direct summarize_messages calls with Anthropic."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_summarize_direct_anthropic(self) -> None:
|
|
"""Test summarize_messages with claude-3-5-haiku."""
|
|
llm = LLM(model="anthropic/claude-3-5-haiku-latest", temperature=0)
|
|
i18n = I18N()
|
|
messages = _build_conversation_messages(include_system=True)
|
|
|
|
summarize_messages(
|
|
messages=messages,
|
|
llm=llm,
|
|
callbacks=[],
|
|
i18n=i18n,
|
|
)
|
|
|
|
assert len(messages) >= 2
|
|
assert messages[0]["role"] == "system"
|
|
summary_msg = messages[-1]
|
|
assert summary_msg["role"] == "user"
|
|
assert len(summary_msg["content"]) > 0
|
|
assert "<summary>" in summary_msg["content"]
|
|
assert "</summary>" in summary_msg["content"]
|
|
|
|
|
|
class TestSummarizeDirectGemini:
|
|
"""Test direct summarize_messages calls with Gemini."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_summarize_direct_gemini(self) -> None:
|
|
"""Test summarize_messages with gemini-2.0-flash."""
|
|
llm = LLM(model="gemini/gemini-2.0-flash", temperature=0)
|
|
i18n = I18N()
|
|
messages = _build_conversation_messages(include_system=True)
|
|
|
|
summarize_messages(
|
|
messages=messages,
|
|
llm=llm,
|
|
callbacks=[],
|
|
i18n=i18n,
|
|
)
|
|
|
|
assert len(messages) >= 2
|
|
assert messages[0]["role"] == "system"
|
|
summary_msg = messages[-1]
|
|
assert summary_msg["role"] == "user"
|
|
assert len(summary_msg["content"]) > 0
|
|
assert "<summary>" in summary_msg["content"]
|
|
assert "</summary>" in summary_msg["content"]
|
|
|
|
|
|
class TestSummarizeDirectAzure:
|
|
"""Test direct summarize_messages calls with Azure."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_summarize_direct_azure(self) -> None:
|
|
"""Test summarize_messages with azure/gpt-4o-mini."""
|
|
llm = LLM(model="azure/gpt-4o-mini", temperature=0)
|
|
i18n = I18N()
|
|
messages = _build_conversation_messages(include_system=True)
|
|
|
|
summarize_messages(
|
|
messages=messages,
|
|
llm=llm,
|
|
callbacks=[],
|
|
i18n=i18n,
|
|
)
|
|
|
|
assert len(messages) >= 2
|
|
assert messages[0]["role"] == "system"
|
|
summary_msg = messages[-1]
|
|
assert summary_msg["role"] == "user"
|
|
assert len(summary_msg["content"]) > 0
|
|
assert "<summary>" in summary_msg["content"]
|
|
assert "</summary>" in summary_msg["content"]
|
|
|
|
|
|
class TestCrewKickoffCompaction:
|
|
"""Test compaction triggered via Crew.kickoff() with small context window."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_crew_kickoff_compaction_openai(self) -> None:
|
|
"""Test that compaction is triggered during kickoff with small context_window_size."""
|
|
llm = LLM(model="gpt-4o-mini", temperature=0)
|
|
# Force a very small context window to trigger compaction
|
|
llm.context_window_size = 500
|
|
|
|
agent = Agent(
|
|
role="Researcher",
|
|
goal="Find information about Python programming",
|
|
backstory="You are an expert researcher.",
|
|
llm=llm,
|
|
verbose=False,
|
|
max_iter=2,
|
|
)
|
|
|
|
task = Task(
|
|
description="What is Python? Give a brief answer.",
|
|
expected_output="A short description of Python.",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task], verbose=False)
|
|
|
|
# This may or may not trigger compaction depending on actual response sizes.
|
|
# The test verifies the code path doesn't crash.
|
|
result = crew.kickoff()
|
|
assert result is not None
|
|
|
|
|
|
class TestAgentExecuteTaskCompaction:
|
|
"""Test compaction triggered via Agent.execute_task()."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_agent_execute_task_compaction(self) -> None:
|
|
"""Test that Agent.execute_task() works with small context_window_size."""
|
|
llm = LLM(model="gpt-4o-mini", temperature=0)
|
|
llm.context_window_size = 500
|
|
|
|
agent = Agent(
|
|
role="Writer",
|
|
goal="Write concise content",
|
|
backstory="You are a skilled writer.",
|
|
llm=llm,
|
|
verbose=False,
|
|
max_iter=2,
|
|
)
|
|
|
|
task = Task(
|
|
description="Write one sentence about the sun.",
|
|
expected_output="A single sentence about the sun.",
|
|
agent=agent,
|
|
)
|
|
|
|
result = agent.execute_task(task=task)
|
|
assert result is not None
|
|
|
|
|
|
class TestSummarizePreservesFiles:
|
|
"""Test that files are preserved through real summarization."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_summarize_preserves_files_integration(self) -> None:
|
|
"""Test that file references survive a real summarization call."""
|
|
llm = LLM(model="gpt-4o-mini", temperature=0)
|
|
i18n = I18N()
|
|
messages = _build_conversation_messages(
|
|
include_system=True, include_files=True
|
|
)
|
|
|
|
summarize_messages(
|
|
messages=messages,
|
|
llm=llm,
|
|
callbacks=[],
|
|
i18n=i18n,
|
|
)
|
|
|
|
# System message preserved
|
|
assert messages[0]["role"] == "system"
|
|
|
|
# Files should be on the summary message with <summary> block
|
|
summary_msg = messages[-1]
|
|
assert "<summary>" in summary_msg["content"]
|
|
assert "</summary>" in summary_msg["content"]
|
|
assert "files" in summary_msg
|
|
assert "reference.pdf" in summary_msg["files"]
|