Files
crewAI/lib/cli/tests/skills/test_safe_extract.py
Rip&Tear fac3e3579b Fix symlink path traversal in skill archive extraction (#6235)
* Fix symlink path traversal in skill archive extraction

`_safe_extractall` (the Python < 3.12 fallback used by `crewai skills`
archive unpacking) validated each member's *name* against the destination
but never validated symlink/hardlink *targets*. A malicious skill tarball
could plant a symlink escaping the destination (e.g. `link -> /home/user/.ssh`)
followed by a regular member written through it (`link/authorized_keys`),
escaping `dest` even though every member name resolves inside it — the
classic symlink-extraction traversal.

The 3.12+ path (`extractall(..., filter="data")`) already blocks this; the
fallback now mirrors it by rejecting absolute link targets and any link
target that resolves outside the destination directory.

Adds regression tests covering absolute and relative escaping symlinks plus
benign in-tree symlinks and ordinary archives.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* Harden skill cache archive extraction

* Reject special skill archive members

---------

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-24 08:50:41 -07:00

141 lines
4.6 KiB
Python

"""Regression tests for path-traversal-safe archive extraction.
Guards against symlink/hardlink-based path traversal in the fallback used on
Python versions without tarfile extraction filters. The filtered path relies on
`tarfile.extractall(..., filter="data")`; the fallback must provide the same
protection by validating link targets, not just member names.
"""
from __future__ import annotations
import io
import tarfile
from pathlib import Path
import pytest
from crewai_cli.experimental.skills.main import _safe_extractall
def _tar_from_members(build) -> tarfile.TarFile:
"""Build an in-memory tar archive via `build(tf)` and return it for reading."""
buf = io.BytesIO()
with tarfile.open(fileobj=buf, mode="w") as tf:
build(tf)
buf.seek(0)
return tarfile.open(fileobj=buf, mode="r")
def test_blocks_symlink_escaping_destination(tmp_path: Path) -> None:
"""A symlink whose target escapes dest, plus a file written through it,
must be rejected before anything is extracted."""
outside = tmp_path / "outside"
outside.mkdir()
dest = tmp_path / "dest"
dest.mkdir()
def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("link")
link.type = tarfile.SYMTYPE
link.linkname = str(outside) # absolute path outside dest
tf.addfile(link)
payload = b"pwned"
info = tarfile.TarInfo("link/evil.txt")
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))
with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)
assert not (outside / "evil.txt").exists()
def test_blocks_relative_symlink_escaping_destination(tmp_path: Path) -> None:
"""A relative symlink (../..) that escapes dest is also rejected."""
dest = tmp_path / "dest"
dest.mkdir()
def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("sub/link")
link.type = tarfile.SYMTYPE
link.linkname = "../../outside" # escapes dest from sub/
tf.addfile(link)
with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)
def test_blocks_hardlink_escaping_destination(tmp_path: Path) -> None:
"""A hardlink whose target escapes dest is rejected."""
dest = tmp_path / "dest"
dest.mkdir()
def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("escape")
link.type = tarfile.LNKTYPE
link.linkname = "../outside.txt" # escapes archive root
tf.addfile(link)
with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)
def test_blocks_special_tar_member(tmp_path: Path) -> None:
"""Special tar members such as FIFOs are rejected."""
dest = tmp_path / "dest"
dest.mkdir()
def build(tf: tarfile.TarFile) -> None:
fifo = tarfile.TarInfo("pipe")
fifo.type = tarfile.FIFOTYPE
tf.addfile(fifo)
with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="unsupported tar member"):
_safe_extractall(tf, dest)
def test_allows_benign_relative_symlink(tmp_path: Path) -> None:
"""A symlink that stays within dest is permitted."""
dest = tmp_path / "dest"
dest.mkdir()
def build(tf: tarfile.TarFile) -> None:
payload = b"hi"
info = tarfile.TarInfo("real.txt")
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))
link = tarfile.TarInfo("alias.txt")
link.type = tarfile.SYMTYPE
link.linkname = "real.txt" # stays inside dest
tf.addfile(link)
with _tar_from_members(build) as tf:
_safe_extractall(tf, dest)
assert (dest / "real.txt").read_bytes() == b"hi"
assert (dest / "alias.txt").is_symlink()
assert (dest / "alias.txt").readlink() == Path("real.txt")
def test_allows_benign_archive(tmp_path: Path) -> None:
"""An ordinary archive of regular files extracts correctly."""
dest = tmp_path / "dest"
dest.mkdir()
def build(tf: tarfile.TarFile) -> None:
for name, body in (("SKILL.md", b"# skill"), ("scripts/run.py", b"print(1)")):
payload = body
info = tarfile.TarInfo(name)
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))
with _tar_from_members(build) as tf:
_safe_extractall(tf, dest)
assert (dest / "SKILL.md").read_bytes() == b"# skill"
assert (dest / "scripts" / "run.py").read_bytes() == b"print(1)"