mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-01 13:18:10 +00:00
fix: stop FileCompressorTool from archiving out-of-tree symlink targets
input_path/output_path were already allow-list validated, but when compressing a directory the per-file members were not. zipfile.write() dereferences symlinks, so a symlink inside an allowed directory pointing at an out-of-tree secret (e.g. ~/.ssh/id_rsa, /etc/passwd) had its contents copied into the archive — a confinement bypass / info-disclosure. - _compress_zip: validate each walked file against the allow-list; skip and record any whose resolved path escapes it. - _compress_tar: filter drops symlink/hardlink members (tar stores links rather than dereferencing, so this prevents shipping an out-of-tree link that resolves at extraction time). - _run: surface a "N item(s) skipped for safety" note rather than swallowing the exclusions. - tests: regression tests asserting the secret content never lands in the zip or tar archive. Also fix a pre-existing broken import that made the whole compressor test module error at collection (empty package __init__; aligned to the module-path import used by sibling tools). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -77,11 +77,19 @@ class FileCompressorTool(BaseTool):
|
||||
"tar.xz": self._compress_tar,
|
||||
}
|
||||
if format == "zip":
|
||||
format_compression[format](input_path, output_path) # type: ignore[operator]
|
||||
skipped = format_compression[format](input_path, output_path) # type: ignore[operator]
|
||||
else:
|
||||
format_compression[format](input_path, output_path, format) # type: ignore[operator]
|
||||
skipped = format_compression[format]( # type: ignore[operator]
|
||||
input_path, output_path, format
|
||||
)
|
||||
|
||||
return f"Successfully compressed '{input_path}' into '{output_path}'"
|
||||
message = f"Successfully compressed '{input_path}' into '{output_path}'"
|
||||
if isinstance(skipped, list) and skipped:
|
||||
message += (
|
||||
f" ({len(skipped)} item(s) skipped for safety: resolved "
|
||||
f"outside the allowed directory)"
|
||||
)
|
||||
return message
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found at path: {input_path}"
|
||||
except PermissionError:
|
||||
@@ -109,8 +117,15 @@ class FileCompressorTool(BaseTool):
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _compress_zip(input_path: str, output_path: str) -> None:
|
||||
"""Compresses input into a zip archive."""
|
||||
def _compress_zip(input_path: str, output_path: str) -> list[str]:
|
||||
"""Compresses input into a zip archive.
|
||||
|
||||
Returns the files skipped because they resolved outside the allow-list.
|
||||
``zipfile.write`` dereferences symlinks, so each walked file is
|
||||
validated to stop a symlink inside the tree from copying the contents
|
||||
of an out-of-tree target (e.g. ``~/.ssh/id_rsa``) into the archive.
|
||||
"""
|
||||
skipped: list[str] = []
|
||||
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
if os.path.isfile(input_path):
|
||||
zipf.write(input_path, os.path.basename(input_path))
|
||||
@@ -118,12 +133,25 @@ class FileCompressorTool(BaseTool):
|
||||
for root, _, files in os.walk(input_path):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
try:
|
||||
validate_file_path(full_path)
|
||||
except ValueError:
|
||||
skipped.append(full_path)
|
||||
continue
|
||||
arcname = os.path.relpath(full_path, start=input_path)
|
||||
zipf.write(full_path, arcname)
|
||||
return skipped
|
||||
|
||||
@staticmethod
|
||||
def _compress_tar(input_path: str, output_path: str, format: str) -> None:
|
||||
"""Compresses input into a tar archive with the given format."""
|
||||
def _compress_tar(input_path: str, output_path: str, format: str) -> list[str]:
|
||||
"""Compresses input into a tar archive with the given format.
|
||||
|
||||
Returns the members skipped for safety. ``tarfile`` stores symlinks and
|
||||
hardlinks as link entries rather than dereferencing them, so the
|
||||
compress-time content leak that affects zip does not apply here. The
|
||||
filter drops link members so an out-of-tree symlink target cannot be
|
||||
shipped inside the archive and resolved at extraction time.
|
||||
"""
|
||||
format_mode = {
|
||||
"tar": "w",
|
||||
"tar.gz": "w:gz",
|
||||
@@ -135,7 +163,15 @@ class FileCompressorTool(BaseTool):
|
||||
raise ValueError(f"Unsupported tar format: {format}")
|
||||
|
||||
mode = format_mode[format]
|
||||
skipped: list[str] = []
|
||||
|
||||
def _drop_links(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo | None:
|
||||
if tarinfo.issym() or tarinfo.islnk():
|
||||
skipped.append(tarinfo.name)
|
||||
return None
|
||||
return tarinfo
|
||||
|
||||
with tarfile.open(output_path, mode) as tarf: # type: ignore[call-overload]
|
||||
arcname = os.path.basename(input_path)
|
||||
tarf.add(input_path, arcname=arcname)
|
||||
tarf.add(input_path, arcname=arcname, filter=_drop_links)
|
||||
return skipped
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import os
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
import zipfile
|
||||
from unittest.mock import patch
|
||||
|
||||
from crewai_tools.tools.files_compressor_tool import FileCompressorTool
|
||||
from crewai_tools.tools.files_compressor_tool.files_compressor_tool import (
|
||||
FileCompressorTool,
|
||||
)
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -129,3 +136,67 @@ def test_prepare_output_makes_dir(mock_exists, mock_makedirs):
|
||||
result = tool._prepare_output("some/missing/path/file.zip", overwrite=True)
|
||||
assert result is True
|
||||
mock_makedirs.assert_called_once()
|
||||
|
||||
|
||||
# --- Security: symlink content must not leak out of the allow-list ---
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def symlink_env():
|
||||
"""A working dir (allow-listed) containing a normal file and a symlink that
|
||||
points to a secret file OUTSIDE the allow-list."""
|
||||
work_dir = tempfile.mkdtemp()
|
||||
secret_dir = tempfile.mkdtemp() # deliberately NOT allow-listed
|
||||
secret_file = os.path.join(secret_dir, "secret.txt")
|
||||
with open(secret_file, "w") as f:
|
||||
f.write("TOP_SECRET_PRIVATE_KEY")
|
||||
|
||||
src = os.path.join(work_dir, "src")
|
||||
os.makedirs(src)
|
||||
with open(os.path.join(src, "normal.txt"), "w") as f:
|
||||
f.write("safe content")
|
||||
os.symlink(secret_file, os.path.join(src, "leak.txt"))
|
||||
|
||||
prev = os.environ.get("CREWAI_TOOLS_ALLOWED_DIRS")
|
||||
os.environ["CREWAI_TOOLS_ALLOWED_DIRS"] = work_dir
|
||||
yield {"work_dir": work_dir, "src": src, "secret_dir": secret_dir}
|
||||
if prev is None:
|
||||
os.environ.pop("CREWAI_TOOLS_ALLOWED_DIRS", None)
|
||||
else:
|
||||
os.environ["CREWAI_TOOLS_ALLOWED_DIRS"] = prev
|
||||
shutil.rmtree(work_dir, ignore_errors=True)
|
||||
shutil.rmtree(secret_dir, ignore_errors=True)
|
||||
|
||||
|
||||
def test_zip_excludes_symlink_to_outside_file(tool, symlink_env):
|
||||
out = os.path.join(symlink_env["work_dir"], "archive.zip")
|
||||
result = tool._run(
|
||||
input_path=symlink_env["src"], output_path=out, format="zip", overwrite=True
|
||||
)
|
||||
assert "Successfully compressed" in result
|
||||
assert "skipped for safety" in result
|
||||
|
||||
with zipfile.ZipFile(out) as zf:
|
||||
names = zf.namelist()
|
||||
assert "normal.txt" in names
|
||||
assert "leak.txt" not in names
|
||||
blob = b"".join(zf.read(n) for n in names)
|
||||
assert b"TOP_SECRET_PRIVATE_KEY" not in blob
|
||||
|
||||
|
||||
def test_tar_excludes_symlink_to_outside_file(tool, symlink_env):
|
||||
out = os.path.join(symlink_env["work_dir"], "archive.tar.gz")
|
||||
result = tool._run(
|
||||
input_path=symlink_env["src"],
|
||||
output_path=out,
|
||||
format="tar.gz",
|
||||
overwrite=True,
|
||||
)
|
||||
assert "Successfully compressed" in result
|
||||
assert "skipped for safety" in result
|
||||
|
||||
with tarfile.open(out) as tf:
|
||||
members = tf.getnames()
|
||||
assert any(m.endswith("normal.txt") for m in members)
|
||||
assert not any(m.endswith("leak.txt") for m in members)
|
||||
assert all(not (tf.getmember(m).issym() or tf.getmember(m).islnk()) for m in members)
|
||||
|
||||
Reference in New Issue
Block a user