Files
crewAI/lib/crewai/tests/llms/test_prompt_cache.py
Devin AI d00f7a7960 fix: strip cache_breakpoint from LiteLLM path for non-Anthropic providers
The LiteLLM-based LLM.call() (used by Groq, OpenAI-compatible, etc.)
does not go through BaseLLM._format_messages() which strips the
cache_breakpoint flag. This causes providers like Groq to reject
messages with 'cache_breakpoint' as an unsupported property.

Strip the flag in _prepare_completion_params() so it is removed from
the wire payload for all providers using the LiteLLM code path.

Fixes #5886

Co-Authored-By: João <joao@crewai.com>
2026-05-21 10:10:46 +00:00

253 lines
11 KiB
Python

"""Regression tests for the provider-agnostic prompt-cache breakpoint flag."""
from __future__ import annotations
from crewai.llms.cache import (
CACHE_BREAKPOINT_KEY,
mark_cache_breakpoint,
strip_cache_breakpoint,
)
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
from crewai.llms.providers.openai.completion import OpenAICompletion
class TestCacheMarkerHelpers:
def test_mark_returns_new_dict(self) -> None:
original = {"role": "user", "content": "hi"}
marked = mark_cache_breakpoint(original)
assert marked[CACHE_BREAKPOINT_KEY] is True
# Marker must NOT bleed back into the caller's dict — callers may
# pass literal dicts and reuse them across calls.
assert CACHE_BREAKPOINT_KEY not in original
def test_strip_is_idempotent(self) -> None:
msg = {"role": "user", "content": "hi", CACHE_BREAKPOINT_KEY: True}
strip_cache_breakpoint(msg)
assert CACHE_BREAKPOINT_KEY not in msg
strip_cache_breakpoint(msg)
assert CACHE_BREAKPOINT_KEY not in msg
class TestBaseFormatDoesNotMutate:
"""The strip-on-format pass must not erase markers from the caller's
messages list — executors reuse a single list across many LLM calls,
and mutating it would defeat caching on every iteration after the first.
"""
def test_repeated_format_preserves_markers(self) -> None:
llm = OpenAICompletion(model="gpt-4o-mini")
messages = [
mark_cache_breakpoint({"role": "system", "content": "stable system"}),
mark_cache_breakpoint({"role": "user", "content": "stable user"}),
]
# First call: provider strips markers from the returned (copied) list
first = llm._format_messages(messages)
assert all(CACHE_BREAKPOINT_KEY not in m for m in first)
# Original list must STILL carry the markers
assert messages[0][CACHE_BREAKPOINT_KEY] is True
assert messages[1][CACHE_BREAKPOINT_KEY] is True
# Second call from the same list still sees the markers
second = llm._format_messages(messages)
assert all(CACHE_BREAKPOINT_KEY not in m for m in second)
assert messages[0][CACHE_BREAKPOINT_KEY] is True
assert messages[1][CACHE_BREAKPOINT_KEY] is True
class TestAnthropicCacheStamping:
def test_stamps_system_with_cache_control(self) -> None:
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint({"role": "system", "content": "you are helpful"}),
mark_cache_breakpoint({"role": "user", "content": "ping"}),
]
formatted, system = llm._format_messages_for_anthropic(messages)
assert isinstance(system, list)
assert system[0]["cache_control"] == {"type": "ephemeral"}
assert system[0]["text"] == "you are helpful"
# First user block carries cache_control too
last_block = formatted[0]["content"][-1]
assert last_block["cache_control"] == {"type": "ephemeral"}
def test_stamps_stable_user_not_tool_result(self) -> None:
"""Within a ReAct loop, tool results are flattened into a trailing
user message. We must NOT stamp that volatile trailing block — we
must stamp the original stable user prompt instead.
"""
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint({"role": "system", "content": "you are helpful"}),
mark_cache_breakpoint({"role": "user", "content": "stable task prompt"}),
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "tc_1",
"function": {"name": "ping", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "volatile tool result"},
]
formatted, _system = llm._format_messages_for_anthropic(messages)
# Find the message that holds the stable prompt
stable = next(
fm
for fm in formatted
if fm["role"] == "user"
and isinstance(fm["content"], list)
and any(
isinstance(b, dict)
and b.get("type") == "text"
and b.get("text") == "stable task prompt"
for b in fm["content"]
)
)
text_block = next(
b for b in stable["content"] if isinstance(b, dict) and b.get("type") == "text"
)
assert text_block.get("cache_control") == {"type": "ephemeral"}
# The tool_result-bearing user message must NOT be stamped
tool_carrier = next(
fm
for fm in formatted
if fm["role"] == "user"
and isinstance(fm["content"], list)
and any(
isinstance(b, dict) and b.get("type") == "tool_result"
for b in fm["content"]
)
)
for block in tool_carrier["content"]:
assert "cache_control" not in block
def test_assistant_marker_is_ignored(self) -> None:
"""Markers on assistant messages have no stable stamp target after
Anthropic's role coalescing, so they should be silently ignored
rather than collected and then dropped on a mismatch.
"""
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint({"role": "system", "content": "you are helpful"}),
mark_cache_breakpoint(
{"role": "assistant", "content": "I will help you out."}
),
{"role": "user", "content": "ping"},
]
formatted, system = llm._format_messages_for_anthropic(messages)
# System still cached
assert isinstance(system, list)
# No user message was marked → no user message should carry cache_control
for fm in formatted:
if fm.get("role") != "user":
continue
content = fm.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict):
assert "cache_control" not in block
def test_list_content_user_marker_matches(self) -> None:
"""A pre-formatted user message with a single text block should still
match against the post-format user message.
"""
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
mark_cache_breakpoint(
{
"role": "user",
"content": [{"type": "text", "text": "stable list prompt"}],
}
),
]
formatted, _system = llm._format_messages_for_anthropic(messages)
user_msg = next(fm for fm in formatted if fm["role"] == "user")
content = user_msg["content"]
assert isinstance(content, list)
text_block = next(b for b in content if isinstance(b, dict) and b.get("type") == "text")
assert text_block.get("cache_control") == {"type": "ephemeral"}
def test_unmarked_messages_get_no_cache_control(self) -> None:
llm = AnthropicCompletion(model="claude-sonnet-4-5")
messages = [
{"role": "system", "content": "no caching here"},
{"role": "user", "content": "no caching here either"},
]
formatted, system = llm._format_messages_for_anthropic(messages)
# No marker → system stays a plain string (no content-block conversion)
assert isinstance(system, str)
# No marker → no cache_control anywhere in formatted messages
for fm in formatted:
content = fm.get("content")
if isinstance(content, list):
for block in content:
assert "cache_control" not in block
class TestNonAnthropicStripsMarker:
def test_openai_format_strips_marker_from_wire_payload(self) -> None:
llm = OpenAICompletion(model="gpt-4o-mini")
messages = [
mark_cache_breakpoint({"role": "system", "content": "stable"}),
mark_cache_breakpoint({"role": "user", "content": "hi"}),
]
formatted = llm._format_messages(messages)
for m in formatted:
assert CACHE_BREAKPOINT_KEY not in m
class TestLiteLLMPathStripsMarker:
"""Regression tests for issue #5886: cache_breakpoint must be stripped
on the LiteLLM code path (used by Groq, OpenAI-compatible, etc.)
which does NOT call BaseLLM._format_messages().
"""
def test_prepare_completion_params_strips_cache_breakpoint(self) -> None:
"""_prepare_completion_params must strip cache_breakpoint from the
messages payload so providers like Groq do not receive unsupported keys.
"""
from crewai.llm import LLM
llm = LLM(model="groq/llama-3.3-70b-versatile", is_litellm=True)
messages = [
mark_cache_breakpoint({"role": "system", "content": "You are a researcher"}),
mark_cache_breakpoint({"role": "user", "content": "Write a summary of AI trends"}),
]
params = llm._prepare_completion_params(messages)
for msg in params["messages"]:
assert CACHE_BREAKPOINT_KEY not in msg, (
f"cache_breakpoint leaked to LiteLLM params: {msg}"
)
def test_prepare_completion_params_preserves_original_markers(self) -> None:
"""Stripping must not mutate the caller's messages — executors reuse
their messages list across ReAct loop iterations.
"""
from crewai.llm import LLM
llm = LLM(model="groq/llama-3.3-70b-versatile", is_litellm=True)
messages = [
mark_cache_breakpoint({"role": "system", "content": "stable system"}),
mark_cache_breakpoint({"role": "user", "content": "stable user"}),
]
llm._prepare_completion_params(messages)
# Original messages must still carry the markers
assert messages[0][CACHE_BREAKPOINT_KEY] is True
assert messages[1][CACHE_BREAKPOINT_KEY] is True
def test_prepare_completion_params_without_markers(self) -> None:
"""Messages without cache_breakpoint must pass through unchanged."""
from crewai.llm import LLM
llm = LLM(model="groq/llama-3.3-70b-versatile", is_litellm=True)
messages = [
{"role": "system", "content": "You are helpful"},
{"role": "user", "content": "Hello"},
]
params = llm._prepare_completion_params(messages)
assert len(params["messages"]) == 2
assert params["messages"][0]["content"] == "You are helpful"
assert params["messages"][1]["content"] == "Hello"
for msg in params["messages"]:
assert CACHE_BREAKPOINT_KEY not in msg