diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py index fc0d1b025..ecb9758b4 100644 --- a/lib/crewai/src/crewai/agent/core.py +++ b/lib/crewai/src/crewai/agent/core.py @@ -36,7 +36,6 @@ from typing_extensions import Self, TypeIs from crewai.agent.planning_config import PlanningConfig from crewai.agent.utils import ( ahandle_knowledge_retrieval, - append_skill_context, apply_training_data, build_task_prompt_with_schema, format_task_with_context, @@ -549,7 +548,6 @@ class Agent(BaseAgent): Returns: The fully prepared task prompt. """ - task_prompt = append_skill_context(self, task_prompt) prepare_tools(self, tools, task) return apply_training_data(self, task_prompt) @@ -1486,8 +1484,6 @@ class Agent(BaseAgent): ), ) - formatted_messages = append_skill_context(self, formatted_messages) - inputs: dict[str, Any] = { "input": formatted_messages, "tool_names": get_tool_names(parsed_tools), diff --git a/lib/crewai/src/crewai/agent/utils.py b/lib/crewai/src/crewai/agent/utils.py index 93c861835..47701c8e4 100644 --- a/lib/crewai/src/crewai/agent/utils.py +++ b/lib/crewai/src/crewai/agent/utils.py @@ -213,30 +213,6 @@ def _combine_knowledge_context(agent: Agent) -> str: return agent_ctx + separator + crew_ctx -def append_skill_context(agent: Agent, task_prompt: str) -> str: - """Append activated skill context sections to the task prompt. - - Args: - agent: The agent with optional skills. - task_prompt: The current task prompt. - - Returns: - The task prompt with skill context appended. - """ - if not agent.skills: - return task_prompt - - from crewai.skills.loader import format_skill_context - from crewai.skills.models import Skill - - skill_sections = [ - format_skill_context(s) for s in agent.skills if isinstance(s, Skill) - ] - if skill_sections: - task_prompt += "\n\n" + "\n\n".join(skill_sections) - return task_prompt - - def apply_training_data(agent: Agent, task_prompt: str) -> str: """Apply training data to the task prompt. diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index bd137a6fd..fce80ad7a 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -174,6 +174,8 @@ class CrewAgentExecutor(BaseAgentExecutor): if provider.setup_messages(cast(ExecutorContext, cast(object, self))): return + from crewai.llms.cache import mark_cache_breakpoint + if self.prompt is not None and "system" in self.prompt: system_prompt = self._format_prompt( cast(str, self.prompt.get("system", "")), inputs @@ -181,11 +183,22 @@ class CrewAgentExecutor(BaseAgentExecutor): user_prompt = self._format_prompt( cast(str, self.prompt.get("user", "")), inputs ) - self.messages.append(format_message_for_llm(system_prompt, role="system")) - self.messages.append(format_message_for_llm(user_prompt)) + # Cache breakpoints: end-of-system caches the per-agent stable + # prefix; end-of-user caches the per-task stable prefix across + # ReAct-loop iterations. + self.messages.append( + mark_cache_breakpoint( + format_message_for_llm(system_prompt, role="system") + ) + ) + self.messages.append( + mark_cache_breakpoint(format_message_for_llm(user_prompt)) + ) elif self.prompt is not None: user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs) - self.messages.append(format_message_for_llm(user_prompt)) + self.messages.append( + mark_cache_breakpoint(format_message_for_llm(user_prompt)) + ) provider.post_setup_messages(cast(ExecutorContext, cast(object, self))) diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index 2fb721fc5..57e853666 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -2586,16 +2586,26 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self._kickoff_input = inputs.get("input", "") if "system" in self.prompt: + from crewai.llms.cache import mark_cache_breakpoint + prompt = cast("SystemPromptResult", self.prompt) system_prompt = self._format_prompt(prompt["system"], inputs) user_prompt = self._format_prompt(prompt["user"], inputs) self.state.messages.append( - format_message_for_llm(system_prompt, role="system") + mark_cache_breakpoint( + format_message_for_llm(system_prompt, role="system") + ) + ) + self.state.messages.append( + mark_cache_breakpoint(format_message_for_llm(user_prompt)) ) - self.state.messages.append(format_message_for_llm(user_prompt)) else: + from crewai.llms.cache import mark_cache_breakpoint + user_prompt = self._format_prompt(self.prompt["prompt"], inputs) - self.state.messages.append(format_message_for_llm(user_prompt)) + self.state.messages.append( + mark_cache_breakpoint(format_message_for_llm(user_prompt)) + ) self._inject_files_from_inputs(inputs) @@ -2677,16 +2687,26 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self._kickoff_input = inputs.get("input", "") if "system" in self.prompt: + from crewai.llms.cache import mark_cache_breakpoint + prompt = cast("SystemPromptResult", self.prompt) system_prompt = self._format_prompt(prompt["system"], inputs) user_prompt = self._format_prompt(prompt["user"], inputs) self.state.messages.append( - format_message_for_llm(system_prompt, role="system") + mark_cache_breakpoint( + format_message_for_llm(system_prompt, role="system") + ) + ) + self.state.messages.append( + mark_cache_breakpoint(format_message_for_llm(user_prompt)) ) - self.state.messages.append(format_message_for_llm(user_prompt)) else: + from crewai.llms.cache import mark_cache_breakpoint + user_prompt = self._format_prompt(self.prompt["prompt"], inputs) - self.state.messages.append(format_message_for_llm(user_prompt)) + self.state.messages.append( + mark_cache_breakpoint(format_message_for_llm(user_prompt)) + ) self._inject_files_from_inputs(inputs) diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 86a3ba276..8c2993d3a 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -14,7 +14,7 @@ from datetime import datetime import json import logging import re -from typing import TYPE_CHECKING, Any, Final, Literal +from typing import TYPE_CHECKING, Any, Final, Literal, cast import uuid from pydantic import ( @@ -703,10 +703,19 @@ class BaseLLM(BaseModel, ABC): Raises: ValueError: If message format is invalid """ + from crewai.llms.cache import CACHE_BREAKPOINT_KEY + from crewai.utilities.types import LLMMessage as _LLMMessage + if isinstance(messages, str): return [{"role": "user", "content": messages}] - # Validate message format + # Validate then copy each message, dropping the cache-breakpoint + # flag in the copy only. The caller (e.g. CrewAgentExecutor, + # experimental.AgentExecutor) reuses its messages buffer across + # many LLM calls in the tool-use loop; mutating their dicts + # in place would erase the markers after the first call and + # break prompt caching for every subsequent iteration. + cleaned: list[LLMMessage] = [] for i, msg in enumerate(messages): if not isinstance(msg, dict): raise ValueError(f"Message at index {i} must be a dictionary") @@ -714,8 +723,12 @@ class BaseLLM(BaseModel, ABC): raise ValueError( f"Message at index {i} must have 'role' and 'content' keys" ) + copy: dict[str, Any] = { + k: v for k, v in msg.items() if k != CACHE_BREAKPOINT_KEY + } + cleaned.append(cast(_LLMMessage, copy)) - return self._process_message_files(messages) + return self._process_message_files(cleaned) def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]: """Process files attached to messages and format for the provider. diff --git a/lib/crewai/src/crewai/llms/cache.py b/lib/crewai/src/crewai/llms/cache.py new file mode 100644 index 000000000..011541d3a --- /dev/null +++ b/lib/crewai/src/crewai/llms/cache.py @@ -0,0 +1,37 @@ +"""Provider-agnostic prompt-cache breakpoint marker. + +Application code (prompt builders, agent executors) marks messages where a +stable prefix ends. Provider adapters then translate the marker into the +cache directive their API expects, or strip it for providers that cache +implicitly (OpenAI, Gemini) or do not cache at all. + +Usage: + + from crewai.llms.cache import mark_cache_breakpoint + + messages = [ + mark_cache_breakpoint({"role": "system", "content": stable_system}), + mark_cache_breakpoint({"role": "user", "content": stable_user_prefix}), + {"role": "user", "content": volatile_query}, + ] +""" + +from __future__ import annotations + +from typing import Any + + +CACHE_BREAKPOINT_KEY = "cache_breakpoint" + + +def mark_cache_breakpoint(message: dict[str, Any]) -> dict[str, Any]: + """Return ``message`` with the cache-breakpoint flag set. + + Returns a new dict so callers can safely pass literal dicts. + """ + return {**message, CACHE_BREAKPOINT_KEY: True} + + +def strip_cache_breakpoint(message: dict[str, Any]) -> None: + """Remove the breakpoint flag from a message in place.""" + message.pop(CACHE_BREAKPOINT_KEY, None) diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index b627a8539..5eeeefb8c 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -425,7 +425,7 @@ class AnthropicCompletion(BaseLLM): def _prepare_completion_params( self, messages: list[LLMMessage], - system_message: str | None = None, + system_message: str | list[dict[str, Any]] | None = None, tools: list[dict[str, Any]] | None = None, available_functions: dict[str, Any] | None = None, ) -> dict[str, Any]: @@ -665,7 +665,7 @@ class AnthropicCompletion(BaseLLM): def _format_messages_for_anthropic( self, messages: str | list[LLMMessage] - ) -> tuple[list[LLMMessage], str | None]: + ) -> tuple[list[LLMMessage], str | list[dict[str, Any]] | None]: """Format messages for Anthropic API. Anthropic has specific requirements: @@ -679,8 +679,51 @@ class AnthropicCompletion(BaseLLM): messages: Input messages Returns: - Tuple of (formatted_messages, system_message) + Tuple of (formatted_messages, system_message). `system_message` is + a list of content blocks (with cache_control stamped) when any + system message in the input carried a cache_breakpoint flag; + otherwise a plain string for backwards compatibility. """ + from crewai.llms.cache import CACHE_BREAKPOINT_KEY + + # Read cache_breakpoint flags from raw input BEFORE super strips them. + # We track the CONTENT of marked user/assistant messages so we can + # locate the corresponding block in formatted_messages — Anthropic + # rewrites tool results into user messages, so positional indices + # do not survive the conversion. We must stamp the original stable + # message (typically the initial task prompt), not whatever happens + # to be the trailing user-role block after tool_result expansion. + cache_system = False + cache_match_contents: list[str] = [] + if not isinstance(messages, str): + for m in messages: + if not (isinstance(m, dict) and m.get(CACHE_BREAKPOINT_KEY)): + continue + role = m.get("role") + if role == "system": + cache_system = True + continue + if role != "user": + # Only user messages survive Anthropic's role-coalescing + # in a stable, addressable position. Markers on assistant + # or tool messages have no reliable stamp target after + # tool_result expansion, so we ignore them. + continue + raw_content = m.get("content") + if isinstance(raw_content, str) and raw_content: + cache_match_contents.append(raw_content) + continue + if isinstance(raw_content, list): + # Pull text from a single-text-block list so callers that + # pre-format content blocks still match cleanly. + text_blocks = [ + b.get("text") + for b in raw_content + if isinstance(b, dict) and b.get("type") == "text" + ] + if len(text_blocks) == 1 and isinstance(text_blocks[0], str): + cache_match_contents.append(text_blocks[0]) + # Use base class formatting first base_formatted = super()._format_messages(messages) @@ -788,7 +831,62 @@ class AnthropicCompletion(BaseLLM): # If first message is not from user, insert a user message at the beginning formatted_messages.insert(0, {"role": "user", "content": "Hello"}) - return formatted_messages, system_message + # Stamp cache_control on the message(s) whose original content was + # marked. We scan formatted_messages in order and stamp the first + # match per marked content — Anthropic permits up to 4 cache + # breakpoints per request, which is more than enough for our usage. + # Matching by content (rather than position) handles the ReAct + # case where tool_result blocks get expanded into trailing user + # messages: the stable initial-task prompt still maps cleanly. + for needle in cache_match_contents: + for fm in formatted_messages: + if fm.get("role") != "user": + continue + content = fm.get("content") + if isinstance(content, str) and content == needle: + self._stamp_cache_control_on_message(fm) + break + if isinstance(content, list): + fm_texts: list[str] = [ + b.get("text", "") + for b in content + if isinstance(b, dict) and b.get("type") == "text" + ] + if len(fm_texts) == 1 and fm_texts[0] == needle: + self._stamp_cache_control_on_message(fm) + break + + # Convert system to content-block form when caching is requested. + system_payload: str | list[dict[str, Any]] | None = system_message + if system_message and cache_system: + system_payload = [ + { + "type": "text", + "text": system_message, + "cache_control": {"type": "ephemeral"}, + } + ] + + return formatted_messages, system_payload + + @staticmethod + def _stamp_cache_control_on_message(message: LLMMessage) -> None: + """Stamp cache_control on the last content block of an Anthropic message.""" + msg = cast(dict[str, Any], message) + content = msg.get("content") + if isinstance(content, str): + msg["content"] = [ + { + "type": "text", + "text": content, + "cache_control": {"type": "ephemeral"}, + } + ] + return + if isinstance(content, list) and content: + last = content[-1] + if isinstance(last, dict): + last["cache_control"] = {"type": "ephemeral"} def _handle_completion( self, diff --git a/lib/crewai/src/crewai/skills/loader.py b/lib/crewai/src/crewai/skills/loader.py index 78e244f90..01659ae6e 100644 --- a/lib/crewai/src/crewai/skills/loader.py +++ b/lib/crewai/src/crewai/skills/loader.py @@ -161,6 +161,9 @@ def format_skill_context(skill: Skill) -> str: At METADATA level: returns name and description only. At INSTRUCTIONS level or above: returns full SKILL.md body. + Output is wrapped in XML tags so the block can serve + as a stable cache anchor when injected into the system prompt. + Args: skill: The skill to format. @@ -169,7 +172,7 @@ def format_skill_context(skill: Skill) -> str: """ if skill.disclosure_level >= INSTRUCTIONS and skill.instructions: parts = [ - f"## Skill: {skill.name}", + f'', skill.description, "", skill.instructions, @@ -180,5 +183,6 @@ def format_skill_context(skill: Skill) -> str: for dir_name, files in sorted(skill.resource_files.items()): if files: parts.append(f"- **{dir_name}/**: {', '.join(files)}") + parts.append("") return "\n".join(parts) - return f"## Skill: {skill.name}\n{skill.description}" + return f'\n{skill.description}\n' diff --git a/lib/crewai/src/crewai/utilities/prompts.py b/lib/crewai/src/crewai/utilities/prompts.py index 31c1a1b27..db89b9c16 100644 --- a/lib/crewai/src/crewai/utilities/prompts.py +++ b/lib/crewai/src/crewai/utilities/prompts.py @@ -86,7 +86,7 @@ class Prompts(BaseModel): slices.append("tools") else: slices.append("no_tools") - system: str = self._build_prompt(slices) + system: str = self._build_prompt(slices) + self._build_skill_block() # Determine which task slice to use: task_slice: COMPONENTS @@ -106,7 +106,7 @@ class Prompts(BaseModel): return SystemPromptResult( system=system, user=self._build_prompt([task_slice]), - prompt=self._build_prompt(slices), + prompt=self._build_prompt(slices) + self._build_skill_block(), ) return StandardPromptResult( prompt=self._build_prompt( @@ -115,8 +115,27 @@ class Prompts(BaseModel): self.prompt_template, self.response_template, ) + + self._build_skill_block() ) + def _build_skill_block(self) -> str: + """Render the agent's activated skills as a stable XML block. + + Skills are agent-scoped (do not change per task), so they live in the + system prompt where prompt-cache prefixes can survive across calls. + """ + skills = getattr(self.agent, "skills", None) + if not skills: + return "" + + from crewai.skills.loader import format_skill_context + from crewai.skills.models import Skill + + sections = [format_skill_context(s) for s in skills if isinstance(s, Skill)] + if not sections: + return "" + return "\n\n\n" + "\n\n".join(sections) + "\n" + def _build_prompt( self, components: list[COMPONENTS], diff --git a/lib/crewai/tests/llms/test_prompt_cache.py b/lib/crewai/tests/llms/test_prompt_cache.py new file mode 100644 index 000000000..c421c331e --- /dev/null +++ b/lib/crewai/tests/llms/test_prompt_cache.py @@ -0,0 +1,196 @@ +"""Regression tests for the provider-agnostic prompt-cache breakpoint flag.""" + +from __future__ import annotations + +from crewai.llms.cache import ( + CACHE_BREAKPOINT_KEY, + mark_cache_breakpoint, + strip_cache_breakpoint, +) +from crewai.llms.providers.anthropic.completion import AnthropicCompletion +from crewai.llms.providers.openai.completion import OpenAICompletion + + +class TestCacheMarkerHelpers: + def test_mark_returns_new_dict(self) -> None: + original = {"role": "user", "content": "hi"} + marked = mark_cache_breakpoint(original) + assert marked[CACHE_BREAKPOINT_KEY] is True + # Marker must NOT bleed back into the caller's dict — callers may + # pass literal dicts and reuse them across calls. + assert CACHE_BREAKPOINT_KEY not in original + + def test_strip_is_idempotent(self) -> None: + msg = {"role": "user", "content": "hi", CACHE_BREAKPOINT_KEY: True} + strip_cache_breakpoint(msg) + assert CACHE_BREAKPOINT_KEY not in msg + strip_cache_breakpoint(msg) + assert CACHE_BREAKPOINT_KEY not in msg + + +class TestBaseFormatDoesNotMutate: + """The strip-on-format pass must not erase markers from the caller's + messages list — executors reuse a single list across many LLM calls, + and mutating it would defeat caching on every iteration after the first. + """ + + def test_repeated_format_preserves_markers(self) -> None: + llm = OpenAICompletion(model="gpt-4o-mini") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "stable system"}), + mark_cache_breakpoint({"role": "user", "content": "stable user"}), + ] + # First call: provider strips markers from the returned (copied) list + first = llm._format_messages(messages) + assert all(CACHE_BREAKPOINT_KEY not in m for m in first) + # Original list must STILL carry the markers + assert messages[0][CACHE_BREAKPOINT_KEY] is True + assert messages[1][CACHE_BREAKPOINT_KEY] is True + # Second call from the same list still sees the markers + second = llm._format_messages(messages) + assert all(CACHE_BREAKPOINT_KEY not in m for m in second) + assert messages[0][CACHE_BREAKPOINT_KEY] is True + assert messages[1][CACHE_BREAKPOINT_KEY] is True + + +class TestAnthropicCacheStamping: + def test_stamps_system_with_cache_control(self) -> None: + llm = AnthropicCompletion(model="claude-sonnet-4-5") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "you are helpful"}), + mark_cache_breakpoint({"role": "user", "content": "ping"}), + ] + formatted, system = llm._format_messages_for_anthropic(messages) + assert isinstance(system, list) + assert system[0]["cache_control"] == {"type": "ephemeral"} + assert system[0]["text"] == "you are helpful" + # First user block carries cache_control too + last_block = formatted[0]["content"][-1] + assert last_block["cache_control"] == {"type": "ephemeral"} + + def test_stamps_stable_user_not_tool_result(self) -> None: + """Within a ReAct loop, tool results are flattened into a trailing + user message. We must NOT stamp that volatile trailing block — we + must stamp the original stable user prompt instead. + """ + llm = AnthropicCompletion(model="claude-sonnet-4-5") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "you are helpful"}), + mark_cache_breakpoint({"role": "user", "content": "stable task prompt"}), + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "tc_1", + "function": {"name": "ping", "arguments": "{}"}, + } + ], + }, + {"role": "tool", "tool_call_id": "tc_1", "content": "volatile tool result"}, + ] + formatted, _system = llm._format_messages_for_anthropic(messages) + # Find the message that holds the stable prompt + stable = next( + fm + for fm in formatted + if fm["role"] == "user" + and isinstance(fm["content"], list) + and any( + isinstance(b, dict) + and b.get("type") == "text" + and b.get("text") == "stable task prompt" + for b in fm["content"] + ) + ) + text_block = next( + b for b in stable["content"] if isinstance(b, dict) and b.get("type") == "text" + ) + assert text_block.get("cache_control") == {"type": "ephemeral"} + # The tool_result-bearing user message must NOT be stamped + tool_carrier = next( + fm + for fm in formatted + if fm["role"] == "user" + and isinstance(fm["content"], list) + and any( + isinstance(b, dict) and b.get("type") == "tool_result" + for b in fm["content"] + ) + ) + for block in tool_carrier["content"]: + assert "cache_control" not in block + + def test_assistant_marker_is_ignored(self) -> None: + """Markers on assistant messages have no stable stamp target after + Anthropic's role coalescing, so they should be silently ignored + rather than collected and then dropped on a mismatch. + """ + llm = AnthropicCompletion(model="claude-sonnet-4-5") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "you are helpful"}), + mark_cache_breakpoint( + {"role": "assistant", "content": "I will help you out."} + ), + {"role": "user", "content": "ping"}, + ] + formatted, system = llm._format_messages_for_anthropic(messages) + # System still cached + assert isinstance(system, list) + # No user message was marked → no user message should carry cache_control + for fm in formatted: + if fm.get("role") != "user": + continue + content = fm.get("content") + if isinstance(content, list): + for block in content: + if isinstance(block, dict): + assert "cache_control" not in block + + def test_list_content_user_marker_matches(self) -> None: + """A pre-formatted user message with a single text block should still + match against the post-format user message. + """ + llm = AnthropicCompletion(model="claude-sonnet-4-5") + messages = [ + mark_cache_breakpoint( + { + "role": "user", + "content": [{"type": "text", "text": "stable list prompt"}], + } + ), + ] + formatted, _system = llm._format_messages_for_anthropic(messages) + user_msg = next(fm for fm in formatted if fm["role"] == "user") + content = user_msg["content"] + assert isinstance(content, list) + text_block = next(b for b in content if isinstance(b, dict) and b.get("type") == "text") + assert text_block.get("cache_control") == {"type": "ephemeral"} + + def test_unmarked_messages_get_no_cache_control(self) -> None: + llm = AnthropicCompletion(model="claude-sonnet-4-5") + messages = [ + {"role": "system", "content": "no caching here"}, + {"role": "user", "content": "no caching here either"}, + ] + formatted, system = llm._format_messages_for_anthropic(messages) + # No marker → system stays a plain string (no content-block conversion) + assert isinstance(system, str) + # No marker → no cache_control anywhere in formatted messages + for fm in formatted: + content = fm.get("content") + if isinstance(content, list): + for block in content: + assert "cache_control" not in block + + +class TestNonAnthropicStripsMarker: + def test_openai_format_strips_marker_from_wire_payload(self) -> None: + llm = OpenAICompletion(model="gpt-4o-mini") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "stable"}), + mark_cache_breakpoint({"role": "user", "content": "hi"}), + ] + formatted = llm._format_messages(messages) + for m in formatted: + assert CACHE_BREAKPOINT_KEY not in m diff --git a/lib/crewai/tests/skills/test_integration.py b/lib/crewai/tests/skills/test_integration.py index c13054e31..f3e572cf9 100644 --- a/lib/crewai/tests/skills/test_integration.py +++ b/lib/crewai/tests/skills/test_integration.py @@ -5,9 +5,9 @@ from pathlib import Path import pytest from crewai import Agent -from crewai.agent.utils import append_skill_context from crewai.skills.loader import activate_skill, discover_skills, format_skill_context from crewai.skills.models import INSTRUCTIONS, METADATA +from crewai.utilities.prompts import Prompts def _create_skill_dir(parent: Path, name: str, body: str = "Body.") -> Path: @@ -34,7 +34,7 @@ class TestSkillDiscoveryAndActivation: assert activated.instructions == "Use this skill." context = format_skill_context(activated) - assert "## Skill: my-skill" in context + assert '' in context assert "Use this skill." in context def test_filter_by_skill_names(self, tmp_path: Path) -> None: @@ -94,7 +94,9 @@ class TestSkillDiscoveryAndActivation: assert agent.skills[0].disclosure_level == METADATA assert agent.skills[0].instructions is None - prompt = append_skill_context(agent, "Plan a 10-day Japan itinerary.") - assert "## Skill: travel" in prompt - assert "Skill travel" in prompt - assert "Use this skill for travel planning." not in prompt + result = Prompts(agent=agent, has_tools=False, use_system_prompt=True).task_execution() + system = getattr(result, "system", "") or result.prompt + assert '' in system + assert "Skill travel" in system + # METADATA-level skills must not leak full instructions into the prompt + assert "Use this skill for travel planning." not in system diff --git a/lib/crewai/tests/skills/test_loader.py b/lib/crewai/tests/skills/test_loader.py index 8303e19df..dcbd29f9d 100644 --- a/lib/crewai/tests/skills/test_loader.py +++ b/lib/crewai/tests/skills/test_loader.py @@ -105,7 +105,7 @@ class TestFormatSkillContext: frontmatter=fm, path=tmp_path, disclosure_level=METADATA ) ctx = format_skill_context(skill) - assert "## Skill: test-skill" in ctx + assert '' in ctx assert "A skill" in ctx def test_instructions_level(self, tmp_path: Path) -> None: @@ -117,7 +117,7 @@ class TestFormatSkillContext: instructions="Do these things.", ) ctx = format_skill_context(skill) - assert "## Skill: test-skill" in ctx + assert '' in ctx assert "Do these things." in ctx def test_no_instructions_at_instructions_level(self, tmp_path: Path) -> None: @@ -129,7 +129,7 @@ class TestFormatSkillContext: instructions=None, ) ctx = format_skill_context(skill) - assert ctx == "## Skill: test-skill\nA skill" + assert ctx == '\nA skill\n' def test_resources_level(self, tmp_path: Path) -> None: fm = SkillFrontmatter(name="test-skill", description="A skill")