feat: add skills models, parser, and validation

2026-04-30 14:52:36 +00:00 · 2026-03-05 19:30:20 -05:00
parent f7e3b4dbe0
commit b2a2e667bf
6 changed files with 393 additions and 0 deletions
--- a/lib/crewai/pyproject.toml
+++ b/lib/crewai/pyproject.toml
@@ -39,6 +39,7 @@ dependencies = [
    "mcp~=1.26.0",
    "uv~=0.9.13",
    "aiosqlite~=0.21.0",
+    "pyyaml~=6.0",
 ]

 [project.urls]
--- a/lib/crewai/src/crewai/skills/init.py
+++ b/lib/crewai/src/crewai/skills/init.py
@@ -0,0 +1,26 @@
+"""Agent Skills standard implementation for crewAI.
+
+Provides filesystem-based skill packaging with progressive disclosure.
+"""
+
+from crewai.skills.loader import (
+    activate_skill,
+    discover_skills,
+    format_skill_context,
+    load_resources,
+)
+from crewai.skills.models import DisclosureLevel, Skill, SkillFrontmatter
+from crewai.skills.parser import SkillParseError, parse_skill_md
+
+
+__all__ = [
+    "DisclosureLevel",
+    "Skill",
+    "SkillFrontmatter",
+    "SkillParseError",
+    "activate_skill",
+    "discover_skills",
+    "format_skill_context",
+    "load_resources",
+    "parse_skill_md",
+]
--- a/lib/crewai/src/crewai/skills/models.py
+++ b/lib/crewai/src/crewai/skills/models.py
@@ -0,0 +1,137 @@
+"""Pydantic data models for the Agent Skills standard.
+
+Defines DisclosureLevel, SkillFrontmatter, and Skill models for
+progressive disclosure of skill information.
+"""
+
+from __future__ import annotations
+
+from enum import IntEnum
+from pathlib import Path
+
+from pydantic import BaseModel, Field, field_validator
+
+from crewai.skills.validation import validate_skill_name
+
+
+class DisclosureLevel(IntEnum):
+    """Progressive disclosure levels for skill loading.
+
+    Attributes:
+        METADATA: Only frontmatter metadata is loaded (name, description).
+        INSTRUCTIONS: Full SKILL.md body is loaded.
+        RESOURCES: Resource directories (scripts, references, assets) are cataloged.
+    """
+
+    METADATA = 1
+    INSTRUCTIONS = 2
+    RESOURCES = 3
+
+
+class SkillFrontmatter(BaseModel, frozen=True):
+    """YAML frontmatter from a SKILL.md file.
+
+    Attributes:
+        name: Unique skill identifier (1-64 chars, lowercase alphanumeric + hyphens).
+        description: Human-readable description of the skill.
+        license: Optional SPDX license identifier.
+        compatibility: Optional compatibility information.
+        metadata: Optional additional metadata as string key-value pairs.
+        allowed_tools: Optional list of tools the skill may use.
+    """
+
+    name: str
+    description: str
+    license: str | None = None
+    compatibility: str | None = None
+    metadata: dict[str, str] | None = None
+    allowed_tools: list[str] | None = None
+
+    @field_validator("name")
+    @classmethod
+    def check_name(cls, v: str) -> str:
+        """Validate skill name against spec constraints."""
+        return validate_skill_name(v)
+
+
+class Skill(BaseModel):
+    """A loaded Agent Skill with progressive disclosure support.
+
+    Attributes:
+        frontmatter: Parsed YAML frontmatter.
+        instructions: Full SKILL.md body text (populated at INSTRUCTIONS level).
+        path: Filesystem path to the skill directory.
+        disclosure_level: Current disclosure level of the skill.
+        resource_files: Cataloged resource files (populated at RESOURCES level).
+    """
+
+    frontmatter: SkillFrontmatter
+    instructions: str | None = None
+    path: Path
+    disclosure_level: DisclosureLevel = Field(default=DisclosureLevel.METADATA)
+    resource_files: dict[str, list[str]] | None = None
+
+    @property
+    def name(self) -> str:
+        """Skill name from frontmatter."""
+        return self.frontmatter.name
+
+    @property
+    def description(self) -> str:
+        """Skill description from frontmatter."""
+        return self.frontmatter.description
+
+    @property
+    def scripts_dir(self) -> Path:
+        """Path to the scripts directory."""
+        return self.path / "scripts"
+
+    @property
+    def references_dir(self) -> Path:
+        """Path to the references directory."""
+        return self.path / "references"
+
+    @property
+    def assets_dir(self) -> Path:
+        """Path to the assets directory."""
+        return self.path / "assets"
+
+    def has_scripts(self) -> bool:
+        """Check if the skill has a scripts directory."""
+        return self.scripts_dir.is_dir()
+
+    def has_references(self) -> bool:
+        """Check if the skill has a references directory."""
+        return self.references_dir.is_dir()
+
+    def has_assets(self) -> bool:
+        """Check if the skill has an assets directory."""
+        return self.assets_dir.is_dir()
+
+    def with_disclosure_level(
+        self,
+        level: DisclosureLevel,
+        instructions: str | None = None,
+        resource_files: dict[str, list[str]] | None = None,
+    ) -> Skill:
+        """Create a new Skill at a different disclosure level.
+
+        Args:
+            level: The new disclosure level.
+            instructions: Optional instructions body text.
+            resource_files: Optional cataloged resource files.
+
+        Returns:
+            A new Skill instance at the specified disclosure level.
+        """
+        return Skill(
+            frontmatter=self.frontmatter,
+            instructions=instructions
+            if instructions is not None
+            else self.instructions,
+            path=self.path,
+            disclosure_level=level,
+            resource_files=(
+                resource_files if resource_files is not None else self.resource_files
+            ),
+        )
--- a/lib/crewai/src/crewai/skills/parser.py
+++ b/lib/crewai/src/crewai/skills/parser.py
@@ -0,0 +1,160 @@
+"""SKILL.md file parsing for the Agent Skills standard.
+
+Parses YAML frontmatter and markdown body from SKILL.md files,
+and provides progressive loading functions for skill data.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from crewai.skills.models import DisclosureLevel, Skill, SkillFrontmatter
+from crewai.skills.validation import validate_directory_name
+
+
+SKILL_FILENAME: str = "SKILL.md"
+
+
+class SkillParseError(ValueError):
+    """Error raised when SKILL.md parsing fails."""
+
+
+def parse_frontmatter(content: str) -> tuple[dict[str, Any], str]:
+    """Split SKILL.md content into frontmatter dict and body text.
+
+    Args:
+        content: Raw SKILL.md file content.
+
+    Returns:
+        Tuple of (frontmatter dict, body text).
+
+    Raises:
+        SkillParseError: If frontmatter delimiters are missing or YAML is invalid.
+    """
+    if not content.startswith("---"):
+        msg = "SKILL.md must start with '---' frontmatter delimiter"
+        raise SkillParseError(msg)
+
+    end_idx = content.find("---", 3)
+    if end_idx == -1:
+        msg = "SKILL.md missing closing '---' frontmatter delimiter"
+        raise SkillParseError(msg)
+
+    yaml_content = content[3:end_idx].strip()
+    body = content[end_idx + 3 :].strip()
+
+    try:
+        frontmatter = yaml.safe_load(yaml_content)
+    except yaml.YAMLError as e:
+        msg = f"Invalid YAML in frontmatter: {e}"
+        raise SkillParseError(msg) from e
+
+    if not isinstance(frontmatter, dict):
+        msg = "Frontmatter must be a YAML mapping"
+        raise SkillParseError(msg)
+
+    return frontmatter, body
+
+
+def parse_skill_md(path: Path) -> tuple[SkillFrontmatter, str]:
+    """Read and parse a SKILL.md file.
+
+    Args:
+        path: Path to the SKILL.md file.
+
+    Returns:
+        Tuple of (SkillFrontmatter, body text).
+
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        SkillParseError: If parsing fails.
+    """
+    content = path.read_text(encoding="utf-8")
+    frontmatter_dict, body = parse_frontmatter(content)
+    frontmatter = SkillFrontmatter(**frontmatter_dict)
+    return frontmatter, body
+
+
+def load_skill_metadata(skill_dir: Path) -> Skill:
+    """Load a skill at METADATA disclosure level.
+
+    Parses SKILL.md frontmatter only and validates directory name.
+
+    Args:
+        skill_dir: Path to the skill directory.
+
+    Returns:
+        Skill instance at METADATA level.
+
+    Raises:
+        FileNotFoundError: If SKILL.md is missing.
+        SkillParseError: If parsing fails.
+        ValueError: If directory name doesn't match skill name.
+    """
+    skill_md_path = skill_dir / SKILL_FILENAME
+    frontmatter, _body = parse_skill_md(skill_md_path)
+    validate_directory_name(skill_dir, frontmatter.name)
+    return Skill(
+        frontmatter=frontmatter,
+        path=skill_dir,
+        disclosure_level=DisclosureLevel.METADATA,
+    )
+
+
+def load_skill_instructions(skill: Skill) -> Skill:
+    """Promote a skill to INSTRUCTIONS disclosure level.
+
+    Reads the full SKILL.md body text.
+
+    Args:
+        skill: Skill at METADATA level.
+
+    Returns:
+        New Skill instance at INSTRUCTIONS level.
+    """
+    if skill.disclosure_level >= DisclosureLevel.INSTRUCTIONS:
+        return skill
+
+    skill_md_path = skill.path / SKILL_FILENAME
+    _, body = parse_skill_md(skill_md_path)
+    return skill.with_disclosure_level(
+        level=DisclosureLevel.INSTRUCTIONS,
+        instructions=body,
+    )
+
+
+def load_skill_resources(skill: Skill) -> Skill:
+    """Promote a skill to RESOURCES disclosure level.
+
+    Catalogs available resource directories (scripts, references, assets).
+
+    Args:
+        skill: Skill at any level.
+
+    Returns:
+        New Skill instance at RESOURCES level.
+    """
+    if skill.disclosure_level >= DisclosureLevel.RESOURCES:
+        return skill
+
+    if skill.disclosure_level < DisclosureLevel.INSTRUCTIONS:
+        skill = load_skill_instructions(skill)
+
+    resource_files: dict[str, list[str]] = {}
+    for dir_name in ("scripts", "references", "assets"):
+        resource_dir = skill.path / dir_name
+        if resource_dir.is_dir():
+            resource_files[dir_name] = sorted(
+                str(f.relative_to(resource_dir))
+                for f in resource_dir.rglob("*")
+                if f.is_file()
+            )
+
+    return skill.with_disclosure_level(
+        level=DisclosureLevel.RESOURCES,
+        instructions=skill.instructions,
+        resource_files=resource_files,
+    )
--- a/lib/crewai/src/crewai/skills/validation.py
+++ b/lib/crewai/src/crewai/skills/validation.py
@@ -0,0 +1,67 @@
+"""Validation functions for Agent Skills specification constraints.
+
+Validates skill names and directory structures per the Agent Skills standard.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+import re
+
+
+MAX_SKILL_NAME_LENGTH: int = 64
+MIN_SKILL_NAME_LENGTH: int = 1
+
+_SKILL_NAME_PATTERN: re.Pattern[str] = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
+
+
+def validate_skill_name(name: str) -> str:
+    """Validate a skill name against the Agent Skills specification.
+
+    Names must be 1-64 characters, lowercase alphanumeric with hyphens,
+    no leading/trailing hyphens, and no consecutive hyphens.
+
+    Args:
+        name: The skill name to validate.
+
+    Returns:
+        The validated skill name.
+
+    Raises:
+        ValueError: If the name violates any constraint.
+    """
+    if len(name) < MIN_SKILL_NAME_LENGTH:
+        msg = "Skill name must not be empty"
+        raise ValueError(msg)
+
+    if len(name) > MAX_SKILL_NAME_LENGTH:
+        msg = (
+            f"Skill name must be at most {MAX_SKILL_NAME_LENGTH} characters, "
+            f"got {len(name)}"
+        )
+        raise ValueError(msg)
+
+    if not _SKILL_NAME_PATTERN.match(name):
+        msg = (
+            f"Invalid skill name '{name}'. Names must be lowercase alphanumeric "
+            f"with single hyphens, no leading/trailing hyphens."
+        )
+        raise ValueError(msg)
+
+    return name
+
+
+def validate_directory_name(skill_dir: Path, skill_name: str) -> None:
+    """Validate that a directory name matches the skill name.
+
+    Args:
+        skill_dir: Path to the skill directory.
+        skill_name: The declared skill name from frontmatter.
+
+    Raises:
+        ValueError: If the directory name does not match the skill name.
+    """
+    dir_name = skill_dir.name
+    if dir_name != skill_name:
+        msg = f"Directory name '{dir_name}' does not match skill name '{skill_name}'"
+        raise ValueError(msg)