mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-06-23 17:18:09 +00:00
Compare commits
6 Commits
main
...
fix-skill-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3dbc29ec9 | ||
|
|
09bd60877d | ||
|
|
72db78a14e | ||
|
|
099201cc92 | ||
|
|
589e590dd7 | ||
|
|
32d2da1bfb |
@@ -378,12 +378,40 @@ class SkillCommand(BaseCommand, PlusAPIMixin):
|
||||
|
||||
|
||||
def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None:
|
||||
"""Path-traversal-safe extraction for Python < 3.12."""
|
||||
"""Path-traversal-safe extraction for Python < 3.12.
|
||||
|
||||
Validates both the member's own path and, for symlink/hardlink members,
|
||||
the link target. Without the link-target check a malicious archive can
|
||||
plant a symlink that escapes ``dest`` (e.g. ``link -> /home/user/.ssh``)
|
||||
followed by a regular member written *through* that link
|
||||
(``link/authorized_keys``), escaping ``dest`` even though every member
|
||||
name resolves inside it. This mirrors the protection that
|
||||
``tarfile.extractall(..., filter="data")`` provides on Python >= 3.12.
|
||||
"""
|
||||
dest_resolved = dest.resolve()
|
||||
for member in tf.getmembers():
|
||||
member_path = (dest / member.name).resolve()
|
||||
if not member_path.is_relative_to(dest_resolved):
|
||||
raise ValueError(f"Blocked path traversal attempt: {member.name!r}")
|
||||
if member.ischr() or member.isblk() or member.isfifo():
|
||||
raise ValueError(f"Blocked special file type in archive: {member.name!r}")
|
||||
if member.issym() or member.islnk():
|
||||
link_target = member.linkname
|
||||
# Absolute link targets always escape the destination.
|
||||
if os.path.isabs(link_target):
|
||||
raise ValueError(
|
||||
f"Blocked link target escaping destination: "
|
||||
f"{member.name!r} -> {link_target!r}"
|
||||
)
|
||||
# Hardlink names are relative to the archive root; symlink
|
||||
# targets are relative to the member's own directory.
|
||||
anchor = dest if member.islnk() else (dest / member.name).parent
|
||||
resolved_target = (anchor / link_target).resolve()
|
||||
if not resolved_target.is_relative_to(dest_resolved):
|
||||
raise ValueError(
|
||||
f"Blocked link target escaping destination: "
|
||||
f"{member.name!r} -> {link_target!r}"
|
||||
)
|
||||
tf.extractall(dest) # noqa: S202
|
||||
|
||||
|
||||
|
||||
107
lib/cli/tests/skills/test_safe_extract.py
Normal file
107
lib/cli/tests/skills/test_safe_extract.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""Regression tests for path-traversal-safe archive extraction.
|
||||
|
||||
Guards against symlink/hardlink-based path traversal in the Python < 3.12
|
||||
extraction fallback (`_safe_extractall`). The 3.12+ path relies on
|
||||
`tarfile.extractall(..., filter="data")`; the fallback must provide the same
|
||||
protection by validating link targets, not just member names.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai_cli.experimental.skills.main import _safe_extractall
|
||||
|
||||
|
||||
def _tar_from_members(build) -> tarfile.TarFile:
|
||||
"""Build an in-memory tar archive via `build(tf)` and return it for reading."""
|
||||
buf = io.BytesIO()
|
||||
with tarfile.open(fileobj=buf, mode="w") as tf:
|
||||
build(tf)
|
||||
buf.seek(0)
|
||||
return tarfile.open(fileobj=buf, mode="r")
|
||||
|
||||
|
||||
def test_blocks_symlink_escaping_destination(tmp_path: Path) -> None:
|
||||
"""A symlink whose target escapes dest, plus a file written through it,
|
||||
must be rejected before anything is extracted."""
|
||||
outside = tmp_path / "outside"
|
||||
outside.mkdir()
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
link = tarfile.TarInfo("link")
|
||||
link.type = tarfile.SYMTYPE
|
||||
link.linkname = str(outside) # absolute path outside dest
|
||||
tf.addfile(link)
|
||||
payload = b"pwned"
|
||||
info = tarfile.TarInfo("link/evil.txt")
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
with pytest.raises(ValueError, match="escaping destination"):
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert not (outside / "evil.txt").exists()
|
||||
|
||||
|
||||
def test_blocks_relative_symlink_escaping_destination(tmp_path: Path) -> None:
|
||||
"""A relative symlink (../..) that escapes dest is also rejected."""
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
link = tarfile.TarInfo("sub/link")
|
||||
link.type = tarfile.SYMTYPE
|
||||
link.linkname = "../../outside" # escapes dest from sub/
|
||||
tf.addfile(link)
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
with pytest.raises(ValueError, match="escaping destination"):
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
|
||||
def test_allows_benign_relative_symlink(tmp_path: Path) -> None:
|
||||
"""A symlink that stays within dest is permitted."""
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
payload = b"hi"
|
||||
info = tarfile.TarInfo("real.txt")
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
link = tarfile.TarInfo("alias.txt")
|
||||
link.type = tarfile.SYMTYPE
|
||||
link.linkname = "real.txt" # stays inside dest
|
||||
tf.addfile(link)
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert (dest / "real.txt").read_bytes() == b"hi"
|
||||
|
||||
|
||||
def test_allows_benign_archive(tmp_path: Path) -> None:
|
||||
"""An ordinary archive of regular files extracts correctly."""
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
for name, body in (("SKILL.md", b"# skill"), ("scripts/run.py", b"print(1)")):
|
||||
payload = body
|
||||
info = tarfile.TarInfo(name)
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert (dest / "SKILL.md").read_bytes() == b"# skill"
|
||||
assert (dest / "scripts" / "run.py").read_bytes() == b"print(1)"
|
||||
@@ -9,6 +9,7 @@ from __future__ import annotations
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import tarfile
|
||||
from typing import TypedDict
|
||||
@@ -127,12 +128,40 @@ class SkillCacheManager:
|
||||
|
||||
|
||||
def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None:
|
||||
"""Path-traversal-safe extraction for Python < 3.12."""
|
||||
"""Path-traversal-safe extraction for Python < 3.12.
|
||||
|
||||
Validates both the member's own path and, for symlink/hardlink members,
|
||||
the link target. Without the link-target check a malicious archive can
|
||||
plant a symlink that escapes ``dest`` (e.g. ``link -> /home/user/.ssh``)
|
||||
followed by a regular member written *through* that link
|
||||
(``link/authorized_keys``), escaping ``dest`` even though every member
|
||||
name resolves inside it. This mirrors the protection that
|
||||
``tarfile.extractall(..., filter="data")`` provides on Python >= 3.12.
|
||||
"""
|
||||
dest_resolved = dest.resolve()
|
||||
for member in tf.getmembers():
|
||||
member_path = (dest / member.name).resolve()
|
||||
if not member_path.is_relative_to(dest_resolved):
|
||||
raise ValueError(f"Blocked path traversal attempt: {member.name!r}")
|
||||
if member.ischr() or member.isblk() or member.isfifo():
|
||||
raise ValueError(f"Blocked special file type in archive: {member.name!r}")
|
||||
if member.issym() or member.islnk():
|
||||
link_target = member.linkname
|
||||
# Absolute link targets always escape the destination.
|
||||
if os.path.isabs(link_target):
|
||||
raise ValueError(
|
||||
f"Blocked link target escaping destination: "
|
||||
f"{member.name!r} -> {link_target!r}"
|
||||
)
|
||||
# Hardlink names are relative to the archive root; symlink
|
||||
# targets are relative to the member's own directory.
|
||||
anchor = dest if member.islnk() else (dest / member.name).parent
|
||||
resolved_target = (anchor / link_target).resolve()
|
||||
if not resolved_target.is_relative_to(dest_resolved):
|
||||
raise ValueError(
|
||||
f"Blocked link target escaping destination: "
|
||||
f"{member.name!r} -> {link_target!r}"
|
||||
)
|
||||
tf.extractall(dest) # noqa: S202
|
||||
|
||||
|
||||
|
||||
136
lib/crewai/tests/experimental/skills/test_cache_safe_extract.py
Normal file
136
lib/crewai/tests/experimental/skills/test_cache_safe_extract.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Regression tests for path-traversal-safe archive extraction in the cache.
|
||||
|
||||
Guards against symlink/hardlink-based path traversal in the Python < 3.12
|
||||
extraction fallback (`_safe_extractall`) used by `SkillCacheManager.store`.
|
||||
The 3.12+ path relies on `tarfile.extractall(..., filter="data")`; the
|
||||
fallback must provide the same protection by validating link targets, not
|
||||
just member names.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.experimental.skills.cache import _safe_extractall
|
||||
|
||||
|
||||
def _tar_from_members(build) -> tarfile.TarFile:
|
||||
"""Build an in-memory tar archive via `build(tf)` and return it for reading."""
|
||||
buf = io.BytesIO()
|
||||
with tarfile.open(fileobj=buf, mode="w") as tf:
|
||||
build(tf)
|
||||
buf.seek(0)
|
||||
return tarfile.open(fileobj=buf, mode="r")
|
||||
|
||||
|
||||
def test_blocks_symlink_escaping_destination(tmp_path: Path) -> None:
|
||||
"""A symlink whose target escapes dest, plus a file written through it,
|
||||
must be rejected before anything is extracted."""
|
||||
outside = tmp_path / "outside"
|
||||
outside.mkdir()
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
link = tarfile.TarInfo("link")
|
||||
link.type = tarfile.SYMTYPE
|
||||
link.linkname = str(outside) # absolute path outside dest
|
||||
tf.addfile(link)
|
||||
payload = b"pwned"
|
||||
info = tarfile.TarInfo("link/evil.txt")
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
with pytest.raises(ValueError, match="escaping destination"):
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert not (outside / "evil.txt").exists()
|
||||
|
||||
|
||||
def test_blocks_relative_symlink_escaping_destination(tmp_path: Path) -> None:
|
||||
"""A relative symlink (../..) that escapes dest is also rejected."""
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
link = tarfile.TarInfo("sub/link")
|
||||
link.type = tarfile.SYMTYPE
|
||||
link.linkname = "../../outside" # escapes dest from sub/
|
||||
tf.addfile(link)
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
with pytest.raises(ValueError, match="escaping destination"):
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
|
||||
def test_allows_benign_relative_symlink(tmp_path: Path) -> None:
|
||||
"""A symlink that stays within dest is permitted."""
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
payload = b"hi"
|
||||
info = tarfile.TarInfo("real.txt")
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
link = tarfile.TarInfo("alias.txt")
|
||||
link.type = tarfile.SYMTYPE
|
||||
link.linkname = "real.txt" # stays inside dest
|
||||
tf.addfile(link)
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert (dest / "real.txt").read_bytes() == b"hi"
|
||||
|
||||
|
||||
def test_allows_benign_archive(tmp_path: Path) -> None:
|
||||
"""An ordinary archive of regular files extracts correctly."""
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
for name, body in (("SKILL.md", b"# skill"), ("scripts/run.py", b"print(1)")):
|
||||
payload = body
|
||||
info = tarfile.TarInfo(name)
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert (dest / "SKILL.md").read_bytes() == b"# skill"
|
||||
assert (dest / "scripts" / "run.py").read_bytes() == b"print(1)"
|
||||
|
||||
|
||||
def test_blocks_hardlink_escaping_destination(tmp_path: Path) -> None:
|
||||
"""A hardlink whose target escapes dest must be rejected."""
|
||||
outside = tmp_path / "outside"
|
||||
outside.mkdir()
|
||||
victim = outside / "victim.txt"
|
||||
victim.write_bytes(b"safe")
|
||||
|
||||
dest = tmp_path / "dest"
|
||||
dest.mkdir()
|
||||
|
||||
def build(tf: tarfile.TarFile) -> None:
|
||||
link = tarfile.TarInfo("victim.txt")
|
||||
link.type = tarfile.LNKTYPE
|
||||
link.linkname = str(victim) # absolute path outside dest
|
||||
tf.addfile(link)
|
||||
|
||||
payload = b"pwned"
|
||||
info = tarfile.TarInfo("victim.txt")
|
||||
info.size = len(payload)
|
||||
tf.addfile(info, io.BytesIO(payload))
|
||||
|
||||
with _tar_from_members(build) as tf:
|
||||
with pytest.raises(ValueError, match="escaping destination"):
|
||||
_safe_extractall(tf, dest)
|
||||
|
||||
assert victim.read_bytes() == b"safe"
|
||||
Reference in New Issue
Block a user