From b2a2e667bf82ea64f0af4f0996c0fc6e904c24e5 Mon Sep 17 00:00:00 2001 From: Greyson Lalonde Date: Thu, 5 Mar 2026 19:30:20 -0500 Subject: [PATCH] feat: add skills models, parser, and validation --- lib/crewai/pyproject.toml | 1 + lib/crewai/src/crewai/skills/__init__.py | 26 ++++ lib/crewai/src/crewai/skills/models.py | 137 ++++++++++++++++++ lib/crewai/src/crewai/skills/parser.py | 160 +++++++++++++++++++++ lib/crewai/src/crewai/skills/validation.py | 67 +++++++++ uv.lock | 2 + 6 files changed, 393 insertions(+) create mode 100644 lib/crewai/src/crewai/skills/__init__.py create mode 100644 lib/crewai/src/crewai/skills/models.py create mode 100644 lib/crewai/src/crewai/skills/parser.py create mode 100644 lib/crewai/src/crewai/skills/validation.py diff --git a/lib/crewai/pyproject.toml b/lib/crewai/pyproject.toml index da8c851df..7a295251a 100644 --- a/lib/crewai/pyproject.toml +++ b/lib/crewai/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "mcp~=1.26.0", "uv~=0.9.13", "aiosqlite~=0.21.0", + "pyyaml~=6.0", ] [project.urls] diff --git a/lib/crewai/src/crewai/skills/__init__.py b/lib/crewai/src/crewai/skills/__init__.py new file mode 100644 index 000000000..538fb4348 --- /dev/null +++ b/lib/crewai/src/crewai/skills/__init__.py @@ -0,0 +1,26 @@ +"""Agent Skills standard implementation for crewAI. + +Provides filesystem-based skill packaging with progressive disclosure. +""" + +from crewai.skills.loader import ( + activate_skill, + discover_skills, + format_skill_context, + load_resources, +) +from crewai.skills.models import DisclosureLevel, Skill, SkillFrontmatter +from crewai.skills.parser import SkillParseError, parse_skill_md + + +__all__ = [ + "DisclosureLevel", + "Skill", + "SkillFrontmatter", + "SkillParseError", + "activate_skill", + "discover_skills", + "format_skill_context", + "load_resources", + "parse_skill_md", +] diff --git a/lib/crewai/src/crewai/skills/models.py b/lib/crewai/src/crewai/skills/models.py new file mode 100644 index 000000000..4749ab4b5 --- /dev/null +++ b/lib/crewai/src/crewai/skills/models.py @@ -0,0 +1,137 @@ +"""Pydantic data models for the Agent Skills standard. + +Defines DisclosureLevel, SkillFrontmatter, and Skill models for +progressive disclosure of skill information. +""" + +from __future__ import annotations + +from enum import IntEnum +from pathlib import Path + +from pydantic import BaseModel, Field, field_validator + +from crewai.skills.validation import validate_skill_name + + +class DisclosureLevel(IntEnum): + """Progressive disclosure levels for skill loading. + + Attributes: + METADATA: Only frontmatter metadata is loaded (name, description). + INSTRUCTIONS: Full SKILL.md body is loaded. + RESOURCES: Resource directories (scripts, references, assets) are cataloged. + """ + + METADATA = 1 + INSTRUCTIONS = 2 + RESOURCES = 3 + + +class SkillFrontmatter(BaseModel, frozen=True): + """YAML frontmatter from a SKILL.md file. + + Attributes: + name: Unique skill identifier (1-64 chars, lowercase alphanumeric + hyphens). + description: Human-readable description of the skill. + license: Optional SPDX license identifier. + compatibility: Optional compatibility information. + metadata: Optional additional metadata as string key-value pairs. + allowed_tools: Optional list of tools the skill may use. + """ + + name: str + description: str + license: str | None = None + compatibility: str | None = None + metadata: dict[str, str] | None = None + allowed_tools: list[str] | None = None + + @field_validator("name") + @classmethod + def check_name(cls, v: str) -> str: + """Validate skill name against spec constraints.""" + return validate_skill_name(v) + + +class Skill(BaseModel): + """A loaded Agent Skill with progressive disclosure support. + + Attributes: + frontmatter: Parsed YAML frontmatter. + instructions: Full SKILL.md body text (populated at INSTRUCTIONS level). + path: Filesystem path to the skill directory. + disclosure_level: Current disclosure level of the skill. + resource_files: Cataloged resource files (populated at RESOURCES level). + """ + + frontmatter: SkillFrontmatter + instructions: str | None = None + path: Path + disclosure_level: DisclosureLevel = Field(default=DisclosureLevel.METADATA) + resource_files: dict[str, list[str]] | None = None + + @property + def name(self) -> str: + """Skill name from frontmatter.""" + return self.frontmatter.name + + @property + def description(self) -> str: + """Skill description from frontmatter.""" + return self.frontmatter.description + + @property + def scripts_dir(self) -> Path: + """Path to the scripts directory.""" + return self.path / "scripts" + + @property + def references_dir(self) -> Path: + """Path to the references directory.""" + return self.path / "references" + + @property + def assets_dir(self) -> Path: + """Path to the assets directory.""" + return self.path / "assets" + + def has_scripts(self) -> bool: + """Check if the skill has a scripts directory.""" + return self.scripts_dir.is_dir() + + def has_references(self) -> bool: + """Check if the skill has a references directory.""" + return self.references_dir.is_dir() + + def has_assets(self) -> bool: + """Check if the skill has an assets directory.""" + return self.assets_dir.is_dir() + + def with_disclosure_level( + self, + level: DisclosureLevel, + instructions: str | None = None, + resource_files: dict[str, list[str]] | None = None, + ) -> Skill: + """Create a new Skill at a different disclosure level. + + Args: + level: The new disclosure level. + instructions: Optional instructions body text. + resource_files: Optional cataloged resource files. + + Returns: + A new Skill instance at the specified disclosure level. + """ + return Skill( + frontmatter=self.frontmatter, + instructions=instructions + if instructions is not None + else self.instructions, + path=self.path, + disclosure_level=level, + resource_files=( + resource_files if resource_files is not None else self.resource_files + ), + ) diff --git a/lib/crewai/src/crewai/skills/parser.py b/lib/crewai/src/crewai/skills/parser.py new file mode 100644 index 000000000..9ad08ab39 --- /dev/null +++ b/lib/crewai/src/crewai/skills/parser.py @@ -0,0 +1,160 @@ +"""SKILL.md file parsing for the Agent Skills standard. + +Parses YAML frontmatter and markdown body from SKILL.md files, +and provides progressive loading functions for skill data. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import yaml + +from crewai.skills.models import DisclosureLevel, Skill, SkillFrontmatter +from crewai.skills.validation import validate_directory_name + + +SKILL_FILENAME: str = "SKILL.md" + + +class SkillParseError(ValueError): + """Error raised when SKILL.md parsing fails.""" + + +def parse_frontmatter(content: str) -> tuple[dict[str, Any], str]: + """Split SKILL.md content into frontmatter dict and body text. + + Args: + content: Raw SKILL.md file content. + + Returns: + Tuple of (frontmatter dict, body text). + + Raises: + SkillParseError: If frontmatter delimiters are missing or YAML is invalid. + """ + if not content.startswith("---"): + msg = "SKILL.md must start with '---' frontmatter delimiter" + raise SkillParseError(msg) + + end_idx = content.find("---", 3) + if end_idx == -1: + msg = "SKILL.md missing closing '---' frontmatter delimiter" + raise SkillParseError(msg) + + yaml_content = content[3:end_idx].strip() + body = content[end_idx + 3 :].strip() + + try: + frontmatter = yaml.safe_load(yaml_content) + except yaml.YAMLError as e: + msg = f"Invalid YAML in frontmatter: {e}" + raise SkillParseError(msg) from e + + if not isinstance(frontmatter, dict): + msg = "Frontmatter must be a YAML mapping" + raise SkillParseError(msg) + + return frontmatter, body + + +def parse_skill_md(path: Path) -> tuple[SkillFrontmatter, str]: + """Read and parse a SKILL.md file. + + Args: + path: Path to the SKILL.md file. + + Returns: + Tuple of (SkillFrontmatter, body text). + + Raises: + FileNotFoundError: If the file does not exist. + SkillParseError: If parsing fails. + """ + content = path.read_text(encoding="utf-8") + frontmatter_dict, body = parse_frontmatter(content) + frontmatter = SkillFrontmatter(**frontmatter_dict) + return frontmatter, body + + +def load_skill_metadata(skill_dir: Path) -> Skill: + """Load a skill at METADATA disclosure level. + + Parses SKILL.md frontmatter only and validates directory name. + + Args: + skill_dir: Path to the skill directory. + + Returns: + Skill instance at METADATA level. + + Raises: + FileNotFoundError: If SKILL.md is missing. + SkillParseError: If parsing fails. + ValueError: If directory name doesn't match skill name. + """ + skill_md_path = skill_dir / SKILL_FILENAME + frontmatter, _body = parse_skill_md(skill_md_path) + validate_directory_name(skill_dir, frontmatter.name) + return Skill( + frontmatter=frontmatter, + path=skill_dir, + disclosure_level=DisclosureLevel.METADATA, + ) + + +def load_skill_instructions(skill: Skill) -> Skill: + """Promote a skill to INSTRUCTIONS disclosure level. + + Reads the full SKILL.md body text. + + Args: + skill: Skill at METADATA level. + + Returns: + New Skill instance at INSTRUCTIONS level. + """ + if skill.disclosure_level >= DisclosureLevel.INSTRUCTIONS: + return skill + + skill_md_path = skill.path / SKILL_FILENAME + _, body = parse_skill_md(skill_md_path) + return skill.with_disclosure_level( + level=DisclosureLevel.INSTRUCTIONS, + instructions=body, + ) + + +def load_skill_resources(skill: Skill) -> Skill: + """Promote a skill to RESOURCES disclosure level. + + Catalogs available resource directories (scripts, references, assets). + + Args: + skill: Skill at any level. + + Returns: + New Skill instance at RESOURCES level. + """ + if skill.disclosure_level >= DisclosureLevel.RESOURCES: + return skill + + if skill.disclosure_level < DisclosureLevel.INSTRUCTIONS: + skill = load_skill_instructions(skill) + + resource_files: dict[str, list[str]] = {} + for dir_name in ("scripts", "references", "assets"): + resource_dir = skill.path / dir_name + if resource_dir.is_dir(): + resource_files[dir_name] = sorted( + str(f.relative_to(resource_dir)) + for f in resource_dir.rglob("*") + if f.is_file() + ) + + return skill.with_disclosure_level( + level=DisclosureLevel.RESOURCES, + instructions=skill.instructions, + resource_files=resource_files, + ) diff --git a/lib/crewai/src/crewai/skills/validation.py b/lib/crewai/src/crewai/skills/validation.py new file mode 100644 index 000000000..681f3ba1e --- /dev/null +++ b/lib/crewai/src/crewai/skills/validation.py @@ -0,0 +1,67 @@ +"""Validation functions for Agent Skills specification constraints. + +Validates skill names and directory structures per the Agent Skills standard. +""" + +from __future__ import annotations + +from pathlib import Path +import re + + +MAX_SKILL_NAME_LENGTH: int = 64 +MIN_SKILL_NAME_LENGTH: int = 1 + +_SKILL_NAME_PATTERN: re.Pattern[str] = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$") + + +def validate_skill_name(name: str) -> str: + """Validate a skill name against the Agent Skills specification. + + Names must be 1-64 characters, lowercase alphanumeric with hyphens, + no leading/trailing hyphens, and no consecutive hyphens. + + Args: + name: The skill name to validate. + + Returns: + The validated skill name. + + Raises: + ValueError: If the name violates any constraint. + """ + if len(name) < MIN_SKILL_NAME_LENGTH: + msg = "Skill name must not be empty" + raise ValueError(msg) + + if len(name) > MAX_SKILL_NAME_LENGTH: + msg = ( + f"Skill name must be at most {MAX_SKILL_NAME_LENGTH} characters, " + f"got {len(name)}" + ) + raise ValueError(msg) + + if not _SKILL_NAME_PATTERN.match(name): + msg = ( + f"Invalid skill name '{name}'. Names must be lowercase alphanumeric " + f"with single hyphens, no leading/trailing hyphens." + ) + raise ValueError(msg) + + return name + + +def validate_directory_name(skill_dir: Path, skill_name: str) -> None: + """Validate that a directory name matches the skill name. + + Args: + skill_dir: Path to the skill directory. + skill_name: The declared skill name from frontmatter. + + Raises: + ValueError: If the directory name does not match the skill name. + """ + dir_name = skill_dir.name + if dir_name != skill_name: + msg = f"Directory name '{dir_name}' does not match skill name '{skill_name}'" + raise ValueError(msg) diff --git a/uv.lock b/uv.lock index c84758360..6bef665cd 100644 --- a/uv.lock +++ b/uv.lock @@ -1206,6 +1206,7 @@ dependencies = [ { name = "pydantic-settings" }, { name = "pyjwt" }, { name = "python-dotenv" }, + { name = "pyyaml" }, { name = "regex" }, { name = "tokenizers" }, { name = "tomli" }, @@ -1310,6 +1311,7 @@ requires-dist = [ { name = "pydantic-settings", specifier = "~=2.10.1" }, { name = "pyjwt", specifier = ">=2.9.0,<3" }, { name = "python-dotenv", specifier = "~=1.1.1" }, + { name = "pyyaml", specifier = "~=6.0" }, { name = "qdrant-client", extras = ["fastembed"], marker = "extra == 'qdrant'", specifier = "~=1.14.3" }, { name = "regex", specifier = "~=2026.1.15" }, { name = "tiktoken", marker = "extra == 'embeddings'", specifier = "~=0.8.0" },