diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index f42750593..9a4af6d9f 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -16,6 +16,7 @@ from .tools import ( EXASearchTool, FileReadTool, FileWriterTool, + FileCompressorTool, FirecrawlCrawlWebsiteTool, FirecrawlScrapeWebsiteTool, FirecrawlSearchTool, diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index d95d08c78..7aba8d4ea 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -18,6 +18,7 @@ from .file_writer_tool.file_writer_tool import FileWriterTool from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( FirecrawlCrawlWebsiteTool, ) +from .files_compressor_tool.files_compressor_tool import FileCompressorTool from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( FirecrawlScrapeWebsiteTool, ) diff --git a/src/crewai_tools/tools/files_compressor_tool/README.md b/src/crewai_tools/tools/files_compressor_tool/README.md new file mode 100644 index 000000000..01fdeee7d --- /dev/null +++ b/src/crewai_tools/tools/files_compressor_tool/README.md @@ -0,0 +1,119 @@ +# 📦 FileCompressorTool + +The **FileCompressorTool** is a utility for compressing individual files or entire directories (including nested subdirectories) into different archive formats, such as `.zip` or `.tar` (including `.tar.gz`, `.tar.bz2`, and `.tar.xz`). This tool is useful for archiving logs, documents, datasets, or backups in a compact format, and ensures flexibility in how the archives are created. + +--- + +## Description + +This tool: +- Accepts a **file or directory** as input. +- Supports **recursive compression** of subdirectories. +- Lets you define a **custom output archive path** or defaults to the current directory. +- Handles **overwrite protection** to avoid unintentional data loss. +- Supports multiple compression formats: `.zip`, `.tar`, `.tar.gz`, `.tar.bz2`, and `.tar.xz`. + +--- + +## Arguments + +| Argument | Type | Required | Description | +|---------------|-----------|----------|-----------------------------------------------------------------------------| +| `input_path` | `str` | ✅ | Path to the file or directory you want to compress. | +| `output_path` | `str` | ❌ | Optional path for the resulting archive file. Defaults to `./.`. | +| `overwrite` | `bool` | ❌ | Whether to overwrite an existing archive file. Defaults to `False`. | +| `format` | `str` | ❌ | Compression format to use. Can be one of `zip`, `tar`, `tar.gz`, `tar.bz2`, `tar.xz`. Defaults to `zip`. | + +--- + + +## Usage Example + +```python +from crewai_tools import FileCompressorTool + +# Initialize the tool +tool = FileCompressorTool() + +# Compress a directory with subdirectories and files into a zip archive +result = tool._run( + input_path="./data/project_docs", # Folder containing subfolders & files + output_path="./output/project_docs.zip", # Optional output path (defaults to zip format) + overwrite=True # Allow overwriting if file exists +) +print(result) +# Example output: Successfully compressed './data/project_docs' into './output/project_docs.zip' + +``` + +--- + +## Example Scenarios + +### Compress a single file into a zip archive: +```python +# Compress a single file into a zip archive +result = tool._run(input_path="report.pdf") +# Example output: Successfully compressed 'report.pdf' into './report.zip' +``` + +### Compress a directory with nested folders into a zip archive: +```python +# Compress a directory containing nested subdirectories and files +result = tool._run(input_path="./my_data", overwrite=True) +# Example output: Successfully compressed 'my_data' into './my_data.zip' +``` + +### Use a custom output path with a zip archive: +```python +# Compress a directory and specify a custom zip output location +result = tool._run(input_path="./my_data", output_path="./backups/my_data_backup.zip", overwrite=True) +# Example output: Successfully compressed 'my_data' into './backups/my_data_backup.zip' +``` + +### Prevent overwriting an existing zip file: +```python +# Try to compress a directory without overwriting an existing zip file +result = tool._run(input_path="./my_data", output_path="./backups/my_data_backup.zip", overwrite=False) +# Example output: Output zip './backups/my_data_backup.zip' already exists and overwrite is set to False. +``` + +### Compress into a tar archive: +```python +# Compress a directory into a tar archive +result = tool._run(input_path="./my_data", format="tar", overwrite=True) +# Example output: Successfully compressed 'my_data' into './my_data.tar' +``` + +### Compress into a tar.gz archive: +```python +# Compress a directory into a tar.gz archive +result = tool._run(input_path="./my_data", format="tar.gz", overwrite=True) +# Example output: Successfully compressed 'my_data' into './my_data.tar.gz' +``` + +### Compress into a tar.bz2 archive: +```python +# Compress a directory into a tar.bz2 archive +result = tool._run(input_path="./my_data", format="tar.bz2", overwrite=True) +# Example output: Successfully compressed 'my_data' into './my_data.tar.bz2' +``` + +### Compress into a tar.xz archive: +```python +# Compress a directory into a tar.xz archive +result = tool._run(input_path="./my_data", format="tar.xz", overwrite=True) +# Example output: Successfully compressed 'my_data' into './my_data.tar.xz' +``` + +--- + +## Error Handling and Validations + +- **File Extension Validation**: The tool ensures that the output file extension matches the selected format (e.g., `.zip` for `zip` format, `.tar` for `tar` format, etc.). +- **File/Directory Existence**: If the input path does not exist, an error message will be returned. +- **Overwrite Protection**: If a file already exists at the output path, the tool checks the `overwrite` flag before proceeding. If `overwrite=False`, it prevents overwriting the existing file. + +--- + +This tool provides a flexible and robust way to handle file and directory compression across multiple formats for efficient storage and backups. diff --git a/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py b/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py new file mode 100644 index 000000000..c86fd64e0 --- /dev/null +++ b/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py @@ -0,0 +1,117 @@ +import os +import zipfile +import tarfile +from typing import Type, Optional +from pydantic import BaseModel, Field +from crewai.tools import BaseTool + + +class FileCompressorToolInput(BaseModel): + """Input schema for FileCompressorTool.""" + input_path: str = Field(..., description="Path to the file or directory to compress.") + output_path: Optional[str] = Field(default=None, description="Optional output archive filename.") + overwrite: bool = Field(default=False, description="Whether to overwrite the archive if it already exists.") + format: str = Field(default="zip", description="Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').") + + +class FileCompressorTool(BaseTool): + name: str = "File Compressor Tool" + description: str = ( + "Compresses a file or directory into an archive (.zip currently supported). " + "Useful for archiving logs, documents, or backups." + ) + args_schema: Type[BaseModel] = FileCompressorToolInput + + + def _run(self, input_path: str, output_path: Optional[str] = None, overwrite: bool = False, format: str = "zip") -> str: + + if not os.path.exists(input_path): + return f"Input path '{input_path}' does not exist." + + if not output_path: + output_path = self._generate_output_path(input_path, format) + + FORMAT_EXTENSION = { + "zip": ".zip", + "tar": ".tar", + "tar.gz": ".tar.gz", + "tar.bz2": ".tar.bz2", + "tar.xz": ".tar.xz" + } + + if format not in FORMAT_EXTENSION: + return f"Compression format '{format}' is not supported. Allowed formats: {', '.join(FORMAT_EXTENSION.keys())}" + elif not output_path.endswith(FORMAT_EXTENSION[format]): + return f"Error: If '{format}' format is chosen, output file must have a '{FORMAT_EXTENSION[format]}' extension." + if not self._prepare_output(output_path, overwrite): + return f"Output '{output_path}' already exists and overwrite is set to False." + + try: + format_compression = { + "zip": self._compress_zip, + "tar": self._compress_tar, + "tar.gz": self._compress_tar, + "tar.bz2": self._compress_tar, + "tar.xz": self._compress_tar + } + if format == "zip": + format_compression[format](input_path, output_path) + else: + format_compression[format](input_path, output_path, format) + + return f"Successfully compressed '{input_path}' into '{output_path}'" + except FileNotFoundError: + return f"Error: File not found at path: {input_path}" + except PermissionError: + return f"Error: Permission denied when accessing '{input_path}' or writing '{output_path}'" + except Exception as e: + return f"An unexpected error occurred during compression: {str(e)}" + + + def _generate_output_path(self, input_path: str, format: str) -> str: + """Generates output path based on input path and format.""" + if os.path.isfile(input_path): + base_name = os.path.splitext(os.path.basename(input_path))[0] # Remove extension + else: + base_name = os.path.basename(os.path.normpath(input_path)) # Directory name + return os.path.join(os.getcwd(), f"{base_name}.{format}") + + def _prepare_output(self, output_path: str, overwrite: bool) -> bool: + """Ensures output path is ready for writing.""" + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + if os.path.exists(output_path) and not overwrite: + return False + return True + + def _compress_zip(self, input_path: str, output_path: str): + """Compresses input into a zip archive.""" + with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + if os.path.isfile(input_path): + zipf.write(input_path, os.path.basename(input_path)) + else: + for root, _, files in os.walk(input_path): + for file in files: + full_path = os.path.join(root, file) + arcname = os.path.relpath(full_path, start=input_path) + zipf.write(full_path, arcname) + + + def _compress_tar(self, input_path: str, output_path: str, format: str): + """Compresses input into a tar archive with the given format.""" + format_mode = { + "tar": "w", + "tar.gz": "w:gz", + "tar.bz2": "w:bz2", + "tar.xz": "w:xz" + } + + if format not in format_mode: + raise ValueError(f"Unsupported tar format: {format}") + + mode = format_mode[format] + + with tarfile.open(output_path, mode) as tarf: + arcname = os.path.basename(input_path) + tarf.add(input_path, arcname=arcname) diff --git a/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py b/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py new file mode 100644 index 000000000..b30199842 --- /dev/null +++ b/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py @@ -0,0 +1,93 @@ + +import os +import pytest +from crewai_tools.tools.files_compressor_tool import FileCompressorTool +from unittest.mock import patch, MagicMock + +@pytest.fixture +def tool(): + return FileCompressorTool() + +@patch("os.path.exists", return_value=False) +def test_input_path_does_not_exist(mock_exists, tool): + result = tool._run("nonexistent_path") + assert "does not exist" in result + +@patch("os.path.exists", return_value=True) +@patch("os.getcwd", return_value="/mocked/cwd") +@patch.object(FileCompressorTool, "_compress_zip") # Mock actual compression +@patch.object(FileCompressorTool, "_prepare_output", return_value=True) +def test_generate_output_path_default(mock_prepare, mock_compress, mock_cwd, mock_exists, tool): + result = tool._run(input_path="mydir", format="zip") + assert "Successfully compressed" in result + mock_compress.assert_called_once() + +@patch("os.path.exists", return_value=True) +@patch.object(FileCompressorTool, "_compress_zip") +@patch.object(FileCompressorTool, "_prepare_output", return_value=True) +def test_zip_compression(mock_prepare, mock_compress, mock_exists, tool): + result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=True) + assert "Successfully compressed" in result + mock_compress.assert_called_once() + +@patch("os.path.exists", return_value=True) +@patch.object(FileCompressorTool, "_compress_tar") +@patch.object(FileCompressorTool, "_prepare_output", return_value=True) +def test_tar_gz_compression(mock_prepare, mock_compress, mock_exists, tool): + result = tool._run(input_path="some/path", output_path="archive.tar.gz", format="tar.gz", overwrite=True) + assert "Successfully compressed" in result + mock_compress.assert_called_once() + +@pytest.mark.parametrize("format", ["tar", "tar.bz2", "tar.xz"]) +@patch("os.path.exists", return_value=True) +@patch.object(FileCompressorTool, "_compress_tar") +@patch.object(FileCompressorTool, "_prepare_output", return_value=True) +def test_other_tar_formats(mock_prepare, mock_compress, mock_exists, format, tool): + result = tool._run(input_path="path/to/input", output_path=f"archive.{format}", format=format, overwrite=True) + assert "Successfully compressed" in result + mock_compress.assert_called_once() + +@pytest.mark.parametrize("format", ["rar", "7z"]) +@patch("os.path.exists", return_value=True) #Ensure input_path exists +def test_unsupported_format(_, tool, format): + result = tool._run(input_path="some/path", output_path=f"archive.{format}", format=format) + assert "not supported" in result + +@patch("os.path.exists", return_value=True) +def test_extension_mismatch(_ , tool): + result = tool._run(input_path="some/path", output_path="archive.zip", format="tar.gz") + assert "must have a '.tar.gz' extension" in result + +@patch("os.path.exists", return_value=True) +@patch("os.path.isfile", return_value=True) +@patch("os.path.exists", return_value=True) +def test_existing_output_no_overwrite(_, __, ___, tool): + result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=False) + assert "overwrite is set to False" in result + +@patch("os.path.exists", return_value=True) +@patch("zipfile.ZipFile", side_effect=PermissionError) +def test_permission_error(mock_zip, _, tool): + result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True) + assert "Permission denied" in result + +@patch("os.path.exists", return_value=True) +@patch("zipfile.ZipFile", side_effect=FileNotFoundError) +def test_file_not_found_during_zip(mock_zip, _, tool): + result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True) + assert "File not found" in result + +@patch("os.path.exists", return_value=True) +@patch("zipfile.ZipFile", side_effect=Exception("Unexpected")) +def test_general_exception_during_zip(mock_zip, _, tool): + result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True) + assert "unexpected error" in result + +# Test: Output directory is created when missing +@patch("os.makedirs") +@patch("os.path.exists", return_value=False) +def test_prepare_output_makes_dir(mock_exists, mock_makedirs): + tool = FileCompressorTool() + result = tool._prepare_output("some/missing/path/file.zip", overwrite=True) + assert result is True + mock_makedirs.assert_called_once()