Files
crewAI/lib/crewai/tests/llms/test_prompt_cache.py
Lorenze Jay 264da8245a
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Check Documentation Broken Links / Check broken links (push) Has been cancelled
Vulnerability Scan / pip-audit (push) Has been cancelled
Nightly Canary Release / Check for new commits (push) Has been cancelled
Nightly Canary Release / Build nightly packages (push) Has been cancelled
Nightly Canary Release / Publish nightly to PyPI (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
Lorenze/imp/prompt layering (#5774)
* improving prompt structure especially for prompt caching

* addressing comments
2026-05-12 12:39:12 -07:00

197 lines
8.3 KiB
Python

"""Regression tests for the provider-agnostic prompt-cache breakpoint flag."""
from __future__ import annotations
from crewai.llms.cache import (
CACHE_BREAKPOINT_KEY,
mark_cache_breakpoint,
strip_cache_breakpoint,
)
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
from crewai.llms.providers.openai.completion import OpenAICompletion
class TestCacheMarkerHelpers:
def test_mark_returns_new_dict(self) -> None:
original = {"role": "user", "content": "hi"}
marked = mark_cache_breakpoint(original)
assert marked[CACHE_BREAKPOINT_KEY] is True
# Marker must NOT bleed back into the caller's dict — callers may
# pass literal dicts and reuse them across calls.
assert CACHE_BREAKPOINT_KEY not in original
def test_strip_is_idempotent(self) -> None:
msg = {"role": "user", "content": "hi", CACHE_BREAKPOINT_KEY: True}
strip_cache_breakpoint(msg)
assert CACHE_BREAKPOINT_KEY not in msg
strip_cache_breakpoint(msg)
assert CACHE_BREAKPOINT_KEY not in msg
class TestBaseFormatDoesNotMutate:
"""The strip-on-format pass must not erase markers from the caller's
messages list — executors reuse a single list across many LLM calls,
and mutating it would defeat caching on every iteration after the first.
"""
def test_repeated_format_preserves_markers(self) -> None:
llm = OpenAICompletion(model="gpt-4o-mini")
messages = [
mark_cache_breakpoint({"role": "system", "content": "stable system"}),
mark_cache_breakpoint({"role": "user", "content": "stable user"}),
]
# First call: provider strips markers from the returned (copied) list
first = llm._format_messages(messages)
assert all(CACHE_BREAKPOINT_KEY not in m for m in first)
# Original list must STILL carry the markers
assert messages[0][CACHE_BREAKPOINT_KEY] is True
assert messages[1][CACHE_BREAKPOINT_KEY] is True
# Second call from the same list still sees the markers
second = llm._format_messages(messages)
assert all(CACHE_BREAKPOINT_KEY not in m for m in second)
assert messages[0][CACHE_BREAKPOINT_KEY] is True
assert messages[1][CACHE_BREAKPOINT_KEY] is True
class TestAnthropicCacheStamping:
def test_stamps_system_with_cache_control(self) -> None:
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint({"role": "system", "content": "you are helpful"}),
mark_cache_breakpoint({"role": "user", "content": "ping"}),
]
formatted, system = llm._format_messages_for_anthropic(messages)
assert isinstance(system, list)
assert system[0]["cache_control"] == {"type": "ephemeral"}
assert system[0]["text"] == "you are helpful"
# First user block carries cache_control too
last_block = formatted[0]["content"][-1]
assert last_block["cache_control"] == {"type": "ephemeral"}
def test_stamps_stable_user_not_tool_result(self) -> None:
"""Within a ReAct loop, tool results are flattened into a trailing
user message. We must NOT stamp that volatile trailing block — we
must stamp the original stable user prompt instead.
"""
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint({"role": "system", "content": "you are helpful"}),
mark_cache_breakpoint({"role": "user", "content": "stable task prompt"}),
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "tc_1",
"function": {"name": "ping", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "volatile tool result"},
]
formatted, _system = llm._format_messages_for_anthropic(messages)
# Find the message that holds the stable prompt
stable = next(
fm
for fm in formatted
if fm["role"] == "user"
and isinstance(fm["content"], list)
and any(
isinstance(b, dict)
and b.get("type") == "text"
and b.get("text") == "stable task prompt"
for b in fm["content"]
)
)
text_block = next(
b for b in stable["content"] if isinstance(b, dict) and b.get("type") == "text"
)
assert text_block.get("cache_control") == {"type": "ephemeral"}
# The tool_result-bearing user message must NOT be stamped
tool_carrier = next(
fm
for fm in formatted
if fm["role"] == "user"
and isinstance(fm["content"], list)
and any(
isinstance(b, dict) and b.get("type") == "tool_result"
for b in fm["content"]
)
)
for block in tool_carrier["content"]:
assert "cache_control" not in block
def test_assistant_marker_is_ignored(self) -> None:
"""Markers on assistant messages have no stable stamp target after
Anthropic's role coalescing, so they should be silently ignored
rather than collected and then dropped on a mismatch.
"""
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint({"role": "system", "content": "you are helpful"}),
mark_cache_breakpoint(
{"role": "assistant", "content": "I will help you out."}
),
{"role": "user", "content": "ping"},
]
formatted, system = llm._format_messages_for_anthropic(messages)
# System still cached
assert isinstance(system, list)
# No user message was marked → no user message should carry cache_control
for fm in formatted:
if fm.get("role") != "user":
continue
content = fm.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict):
assert "cache_control" not in block
def test_list_content_user_marker_matches(self) -> None:
"""A pre-formatted user message with a single text block should still
match against the post-format user message.
"""
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint(
{
"role": "user",
"content": [{"type": "text", "text": "stable list prompt"}],
}
),
]
formatted, _system = llm._format_messages_for_anthropic(messages)
user_msg = next(fm for fm in formatted if fm["role"] == "user")
content = user_msg["content"]
assert isinstance(content, list)
text_block = next(b for b in content if isinstance(b, dict) and b.get("type") == "text")
assert text_block.get("cache_control") == {"type": "ephemeral"}
def test_unmarked_messages_get_no_cache_control(self) -> None:
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
{"role": "system", "content": "no caching here"},
{"role": "user", "content": "no caching here either"},
]
formatted, system = llm._format_messages_for_anthropic(messages)
# No marker → system stays a plain string (no content-block conversion)
assert isinstance(system, str)
# No marker → no cache_control anywhere in formatted messages
for fm in formatted:
content = fm.get("content")
if isinstance(content, list):
for block in content:
assert "cache_control" not in block
class TestNonAnthropicStripsMarker:
def test_openai_format_strips_marker_from_wire_payload(self) -> None:
llm = OpenAICompletion(model="gpt-4o-mini")
messages = [
mark_cache_breakpoint({"role": "system", "content": "stable"}),
mark_cache_breakpoint({"role": "user", "content": "hi"}),
]
formatted = llm._format_messages(messages)
for m in formatted:
assert CACHE_BREAKPOINT_KEY not in m