mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-05 15:09:22 +00:00
* Fix symlink path traversal in skill archive extraction `_safe_extractall` (the Python < 3.12 fallback used by `crewai skills` archive unpacking) validated each member's *name* against the destination but never validated symlink/hardlink *targets*. A malicious skill tarball could plant a symlink escaping the destination (e.g. `link -> /home/user/.ssh`) followed by a regular member written through it (`link/authorized_keys`), escaping `dest` even though every member name resolves inside it — the classic symlink-extraction traversal. The 3.12+ path (`extractall(..., filter="data")`) already blocks this; the fallback now mirrors it by rejecting absolute link targets and any link target that resolves outside the destination directory. Adds regression tests covering absolute and relative escaping symlinks plus benign in-tree symlinks and ordinary archives. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * Harden skill cache archive extraction * Reject special skill archive members --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
172 lines
6.0 KiB
Python
172 lines
6.0 KiB
Python
"""Cache manager for registry-downloaded skills.
|
|
|
|
Manages ~/.crewai/skills/{org}/{name}/ as the global skill cache.
|
|
One version is stored per skill (last install wins).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
import json
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
import tarfile
|
|
from typing import TypedDict
|
|
import zipfile
|
|
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
|
|
_CACHE_ROOT = Path.home() / ".crewai" / "skills"
|
|
_META_FILENAME = ".crewai_meta.json"
|
|
|
|
|
|
class SkillMetadata(TypedDict):
|
|
org: str
|
|
name: str
|
|
version: str | None
|
|
installed_at: str
|
|
|
|
|
|
class SkillCacheManager:
|
|
"""Manages the global skill cache at ~/.crewai/skills/."""
|
|
|
|
def __init__(self, cache_root: Path | None = None) -> None:
|
|
self._root = cache_root or _CACHE_ROOT
|
|
|
|
def _skill_dir(self, org: str, name: str) -> Path:
|
|
return self._root / org / name
|
|
|
|
def get_cached_path(self, org: str, name: str) -> Path | None:
|
|
"""Return the cached skill directory path if it exists, else None."""
|
|
skill_dir = self._skill_dir(org, name)
|
|
meta_file = skill_dir / _META_FILENAME
|
|
if skill_dir.is_dir() and meta_file.exists():
|
|
return skill_dir
|
|
return None
|
|
|
|
def store(
|
|
self, org: str, name: str, version: str | None, archive_bytes: bytes
|
|
) -> Path:
|
|
"""Unpack an archive into the cache and write metadata.
|
|
|
|
Uses tarfile with filter='data' for path-traversal protection.
|
|
|
|
Args:
|
|
org: Organisation slug.
|
|
name: Skill name.
|
|
version: Semantic version string, or None if unknown.
|
|
archive_bytes: Raw bytes of a .tar.gz archive.
|
|
|
|
Returns:
|
|
Path to the stored skill directory.
|
|
"""
|
|
skill_dir = self._skill_dir(org, name)
|
|
if skill_dir.exists():
|
|
import shutil
|
|
|
|
shutil.rmtree(skill_dir)
|
|
skill_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
import io
|
|
|
|
try:
|
|
with tarfile.open(fileobj=io.BytesIO(archive_bytes), mode="r:gz") as tf:
|
|
try:
|
|
tf.extractall(skill_dir, filter="data")
|
|
except TypeError:
|
|
_safe_extractall(tf, skill_dir)
|
|
except tarfile.TarError:
|
|
with zipfile.ZipFile(io.BytesIO(archive_bytes)) as zf:
|
|
_safe_extract_zip(zf, skill_dir)
|
|
|
|
meta: SkillMetadata = {
|
|
"org": org,
|
|
"name": name,
|
|
"version": version,
|
|
"installed_at": datetime.now(tz=timezone.utc).isoformat(),
|
|
}
|
|
(skill_dir / _META_FILENAME).write_text(json.dumps(meta, indent=2))
|
|
return skill_dir
|
|
|
|
def list_cached(self) -> list[SkillMetadata]:
|
|
"""Return metadata for every cached skill."""
|
|
results: list[SkillMetadata] = []
|
|
if not self._root.exists():
|
|
return results
|
|
for org_dir in sorted(self._root.iterdir()):
|
|
if not org_dir.is_dir():
|
|
continue
|
|
for skill_dir in sorted(org_dir.iterdir()):
|
|
meta_file = skill_dir / _META_FILENAME
|
|
if meta_file.exists():
|
|
try:
|
|
results.append(json.loads(meta_file.read_text()))
|
|
except (json.JSONDecodeError, KeyError):
|
|
_logger.debug(
|
|
"Skipping malformed cache entry: %s",
|
|
meta_file,
|
|
exc_info=True,
|
|
)
|
|
return results
|
|
|
|
def invalidate(self, org: str, name: str) -> bool:
|
|
"""Remove a cached skill.
|
|
|
|
Returns:
|
|
True if the cache entry existed and was removed, False otherwise.
|
|
"""
|
|
skill_dir = self._skill_dir(org, name)
|
|
if skill_dir.exists():
|
|
import shutil
|
|
|
|
shutil.rmtree(skill_dir)
|
|
return True
|
|
return False
|
|
|
|
|
|
def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None:
|
|
"""Path-traversal-safe extraction for Python versions without tar filters.
|
|
|
|
Validates both the member's own path and, for symlink/hardlink members,
|
|
the link target. Without the link-target check a malicious archive can
|
|
plant a symlink that escapes ``dest`` followed by a regular member written
|
|
through that link, escaping ``dest`` even though every member name resolves
|
|
inside it. This mirrors the protection that
|
|
``tarfile.extractall(..., filter="data")`` provides when available.
|
|
"""
|
|
dest_resolved = dest.resolve()
|
|
for member in tf.getmembers():
|
|
member_path = (dest / member.name).resolve()
|
|
if not member_path.is_relative_to(dest_resolved):
|
|
raise ValueError(f"Blocked path traversal attempt: {member.name!r}")
|
|
if not (member.isfile() or member.isdir() or member.issym() or member.islnk()):
|
|
raise ValueError(f"Blocked unsupported tar member: {member.name!r}")
|
|
if member.issym() or member.islnk():
|
|
link_target = member.linkname
|
|
if os.path.isabs(link_target):
|
|
raise ValueError(
|
|
f"Blocked link target escaping destination: "
|
|
f"{member.name!r} -> {link_target!r}"
|
|
)
|
|
anchor = dest if member.islnk() else (dest / member.name).parent
|
|
resolved_target = (anchor / link_target).resolve()
|
|
if not resolved_target.is_relative_to(dest_resolved):
|
|
raise ValueError(
|
|
f"Blocked link target escaping destination: "
|
|
f"{member.name!r} -> {link_target!r}"
|
|
)
|
|
tf.extractall(dest) # noqa: S202
|
|
|
|
|
|
def _safe_extract_zip(zf: zipfile.ZipFile, dest: Path) -> None:
|
|
"""Path-traversal-safe ZIP extraction."""
|
|
dest_resolved = dest.resolve()
|
|
for member in zf.namelist():
|
|
member_path = (dest / member).resolve()
|
|
if not member_path.is_relative_to(dest_resolved):
|
|
raise ValueError(f"Blocked path traversal attempt: {member!r}")
|
|
zf.extractall(dest) # noqa: S202
|