mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
FileCompressorTool with support for files and subdirectories (#282)
* FileCompressorTool with support for files and subdirectories * README.md * Updated files_compressor_tool.py * Enhanced FileCompressorTool different compression formats * Update README.md * Updated with lookup tables * Updated files_compressor_tool.py * Added Test Cases * Removing Test_Cases.md inorder to update with correct test case as per the review * Added Test Cases * Test Cases with patch,MagicMock * Empty lines Removed * Updated Test Case,Ensured Maximum Scenarios * Deleting old one * Updated __init__.py to include FileCompressorTool * Update __init__.py to add FileCompressorTool
This commit is contained in:
@@ -16,6 +16,7 @@ from .tools import (
|
||||
EXASearchTool,
|
||||
FileReadTool,
|
||||
FileWriterTool,
|
||||
FileCompressorTool,
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
FirecrawlSearchTool,
|
||||
|
||||
@@ -18,6 +18,7 @@ from .file_writer_tool.file_writer_tool import FileWriterTool
|
||||
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
)
|
||||
from .files_compressor_tool.files_compressor_tool import FileCompressorTool
|
||||
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
)
|
||||
|
||||
119
src/crewai_tools/tools/files_compressor_tool/README.md
Normal file
119
src/crewai_tools/tools/files_compressor_tool/README.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# 📦 FileCompressorTool
|
||||
|
||||
The **FileCompressorTool** is a utility for compressing individual files or entire directories (including nested subdirectories) into different archive formats, such as `.zip` or `.tar` (including `.tar.gz`, `.tar.bz2`, and `.tar.xz`). This tool is useful for archiving logs, documents, datasets, or backups in a compact format, and ensures flexibility in how the archives are created.
|
||||
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
This tool:
|
||||
- Accepts a **file or directory** as input.
|
||||
- Supports **recursive compression** of subdirectories.
|
||||
- Lets you define a **custom output archive path** or defaults to the current directory.
|
||||
- Handles **overwrite protection** to avoid unintentional data loss.
|
||||
- Supports multiple compression formats: `.zip`, `.tar`, `.tar.gz`, `.tar.bz2`, and `.tar.xz`.
|
||||
|
||||
---
|
||||
|
||||
## Arguments
|
||||
|
||||
| Argument | Type | Required | Description |
|
||||
|---------------|-----------|----------|-----------------------------------------------------------------------------|
|
||||
| `input_path` | `str` | ✅ | Path to the file or directory you want to compress. |
|
||||
| `output_path` | `str` | ❌ | Optional path for the resulting archive file. Defaults to `./<name>.<format>`. |
|
||||
| `overwrite` | `bool` | ❌ | Whether to overwrite an existing archive file. Defaults to `False`. |
|
||||
| `format` | `str` | ❌ | Compression format to use. Can be one of `zip`, `tar`, `tar.gz`, `tar.bz2`, `tar.xz`. Defaults to `zip`. |
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
from crewai_tools import FileCompressorTool
|
||||
|
||||
# Initialize the tool
|
||||
tool = FileCompressorTool()
|
||||
|
||||
# Compress a directory with subdirectories and files into a zip archive
|
||||
result = tool._run(
|
||||
input_path="./data/project_docs", # Folder containing subfolders & files
|
||||
output_path="./output/project_docs.zip", # Optional output path (defaults to zip format)
|
||||
overwrite=True # Allow overwriting if file exists
|
||||
)
|
||||
print(result)
|
||||
# Example output: Successfully compressed './data/project_docs' into './output/project_docs.zip'
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Example Scenarios
|
||||
|
||||
### Compress a single file into a zip archive:
|
||||
```python
|
||||
# Compress a single file into a zip archive
|
||||
result = tool._run(input_path="report.pdf")
|
||||
# Example output: Successfully compressed 'report.pdf' into './report.zip'
|
||||
```
|
||||
|
||||
### Compress a directory with nested folders into a zip archive:
|
||||
```python
|
||||
# Compress a directory containing nested subdirectories and files
|
||||
result = tool._run(input_path="./my_data", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.zip'
|
||||
```
|
||||
|
||||
### Use a custom output path with a zip archive:
|
||||
```python
|
||||
# Compress a directory and specify a custom zip output location
|
||||
result = tool._run(input_path="./my_data", output_path="./backups/my_data_backup.zip", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './backups/my_data_backup.zip'
|
||||
```
|
||||
|
||||
### Prevent overwriting an existing zip file:
|
||||
```python
|
||||
# Try to compress a directory without overwriting an existing zip file
|
||||
result = tool._run(input_path="./my_data", output_path="./backups/my_data_backup.zip", overwrite=False)
|
||||
# Example output: Output zip './backups/my_data_backup.zip' already exists and overwrite is set to False.
|
||||
```
|
||||
|
||||
### Compress into a tar archive:
|
||||
```python
|
||||
# Compress a directory into a tar archive
|
||||
result = tool._run(input_path="./my_data", format="tar", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar'
|
||||
```
|
||||
|
||||
### Compress into a tar.gz archive:
|
||||
```python
|
||||
# Compress a directory into a tar.gz archive
|
||||
result = tool._run(input_path="./my_data", format="tar.gz", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar.gz'
|
||||
```
|
||||
|
||||
### Compress into a tar.bz2 archive:
|
||||
```python
|
||||
# Compress a directory into a tar.bz2 archive
|
||||
result = tool._run(input_path="./my_data", format="tar.bz2", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar.bz2'
|
||||
```
|
||||
|
||||
### Compress into a tar.xz archive:
|
||||
```python
|
||||
# Compress a directory into a tar.xz archive
|
||||
result = tool._run(input_path="./my_data", format="tar.xz", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar.xz'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Error Handling and Validations
|
||||
|
||||
- **File Extension Validation**: The tool ensures that the output file extension matches the selected format (e.g., `.zip` for `zip` format, `.tar` for `tar` format, etc.).
|
||||
- **File/Directory Existence**: If the input path does not exist, an error message will be returned.
|
||||
- **Overwrite Protection**: If a file already exists at the output path, the tool checks the `overwrite` flag before proceeding. If `overwrite=False`, it prevents overwriting the existing file.
|
||||
|
||||
---
|
||||
|
||||
This tool provides a flexible and robust way to handle file and directory compression across multiple formats for efficient storage and backups.
|
||||
@@ -0,0 +1,117 @@
|
||||
import os
|
||||
import zipfile
|
||||
import tarfile
|
||||
from typing import Type, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
|
||||
class FileCompressorToolInput(BaseModel):
|
||||
"""Input schema for FileCompressorTool."""
|
||||
input_path: str = Field(..., description="Path to the file or directory to compress.")
|
||||
output_path: Optional[str] = Field(default=None, description="Optional output archive filename.")
|
||||
overwrite: bool = Field(default=False, description="Whether to overwrite the archive if it already exists.")
|
||||
format: str = Field(default="zip", description="Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').")
|
||||
|
||||
|
||||
class FileCompressorTool(BaseTool):
|
||||
name: str = "File Compressor Tool"
|
||||
description: str = (
|
||||
"Compresses a file or directory into an archive (.zip currently supported). "
|
||||
"Useful for archiving logs, documents, or backups."
|
||||
)
|
||||
args_schema: Type[BaseModel] = FileCompressorToolInput
|
||||
|
||||
|
||||
def _run(self, input_path: str, output_path: Optional[str] = None, overwrite: bool = False, format: str = "zip") -> str:
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
return f"Input path '{input_path}' does not exist."
|
||||
|
||||
if not output_path:
|
||||
output_path = self._generate_output_path(input_path, format)
|
||||
|
||||
FORMAT_EXTENSION = {
|
||||
"zip": ".zip",
|
||||
"tar": ".tar",
|
||||
"tar.gz": ".tar.gz",
|
||||
"tar.bz2": ".tar.bz2",
|
||||
"tar.xz": ".tar.xz"
|
||||
}
|
||||
|
||||
if format not in FORMAT_EXTENSION:
|
||||
return f"Compression format '{format}' is not supported. Allowed formats: {', '.join(FORMAT_EXTENSION.keys())}"
|
||||
elif not output_path.endswith(FORMAT_EXTENSION[format]):
|
||||
return f"Error: If '{format}' format is chosen, output file must have a '{FORMAT_EXTENSION[format]}' extension."
|
||||
if not self._prepare_output(output_path, overwrite):
|
||||
return f"Output '{output_path}' already exists and overwrite is set to False."
|
||||
|
||||
try:
|
||||
format_compression = {
|
||||
"zip": self._compress_zip,
|
||||
"tar": self._compress_tar,
|
||||
"tar.gz": self._compress_tar,
|
||||
"tar.bz2": self._compress_tar,
|
||||
"tar.xz": self._compress_tar
|
||||
}
|
||||
if format == "zip":
|
||||
format_compression[format](input_path, output_path)
|
||||
else:
|
||||
format_compression[format](input_path, output_path, format)
|
||||
|
||||
return f"Successfully compressed '{input_path}' into '{output_path}'"
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found at path: {input_path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied when accessing '{input_path}' or writing '{output_path}'"
|
||||
except Exception as e:
|
||||
return f"An unexpected error occurred during compression: {str(e)}"
|
||||
|
||||
|
||||
def _generate_output_path(self, input_path: str, format: str) -> str:
|
||||
"""Generates output path based on input path and format."""
|
||||
if os.path.isfile(input_path):
|
||||
base_name = os.path.splitext(os.path.basename(input_path))[0] # Remove extension
|
||||
else:
|
||||
base_name = os.path.basename(os.path.normpath(input_path)) # Directory name
|
||||
return os.path.join(os.getcwd(), f"{base_name}.{format}")
|
||||
|
||||
def _prepare_output(self, output_path: str, overwrite: bool) -> bool:
|
||||
"""Ensures output path is ready for writing."""
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
if os.path.exists(output_path) and not overwrite:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _compress_zip(self, input_path: str, output_path: str):
|
||||
"""Compresses input into a zip archive."""
|
||||
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||||
if os.path.isfile(input_path):
|
||||
zipf.write(input_path, os.path.basename(input_path))
|
||||
else:
|
||||
for root, _, files in os.walk(input_path):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
arcname = os.path.relpath(full_path, start=input_path)
|
||||
zipf.write(full_path, arcname)
|
||||
|
||||
|
||||
def _compress_tar(self, input_path: str, output_path: str, format: str):
|
||||
"""Compresses input into a tar archive with the given format."""
|
||||
format_mode = {
|
||||
"tar": "w",
|
||||
"tar.gz": "w:gz",
|
||||
"tar.bz2": "w:bz2",
|
||||
"tar.xz": "w:xz"
|
||||
}
|
||||
|
||||
if format not in format_mode:
|
||||
raise ValueError(f"Unsupported tar format: {format}")
|
||||
|
||||
mode = format_mode[format]
|
||||
|
||||
with tarfile.open(output_path, mode) as tarf:
|
||||
arcname = os.path.basename(input_path)
|
||||
tarf.add(input_path, arcname=arcname)
|
||||
@@ -0,0 +1,93 @@
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from crewai_tools.tools.files_compressor_tool import FileCompressorTool
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
@pytest.fixture
|
||||
def tool():
|
||||
return FileCompressorTool()
|
||||
|
||||
@patch("os.path.exists", return_value=False)
|
||||
def test_input_path_does_not_exist(mock_exists, tool):
|
||||
result = tool._run("nonexistent_path")
|
||||
assert "does not exist" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("os.getcwd", return_value="/mocked/cwd")
|
||||
@patch.object(FileCompressorTool, "_compress_zip") # Mock actual compression
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_generate_output_path_default(mock_prepare, mock_compress, mock_cwd, mock_exists, tool):
|
||||
result = tool._run(input_path="mydir", format="zip")
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch.object(FileCompressorTool, "_compress_zip")
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_zip_compression(mock_prepare, mock_compress, mock_exists, tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=True)
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch.object(FileCompressorTool, "_compress_tar")
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_tar_gz_compression(mock_prepare, mock_compress, mock_exists, tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.tar.gz", format="tar.gz", overwrite=True)
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@pytest.mark.parametrize("format", ["tar", "tar.bz2", "tar.xz"])
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch.object(FileCompressorTool, "_compress_tar")
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_other_tar_formats(mock_prepare, mock_compress, mock_exists, format, tool):
|
||||
result = tool._run(input_path="path/to/input", output_path=f"archive.{format}", format=format, overwrite=True)
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@pytest.mark.parametrize("format", ["rar", "7z"])
|
||||
@patch("os.path.exists", return_value=True) #Ensure input_path exists
|
||||
def test_unsupported_format(_, tool, format):
|
||||
result = tool._run(input_path="some/path", output_path=f"archive.{format}", format=format)
|
||||
assert "not supported" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
def test_extension_mismatch(_ , tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.zip", format="tar.gz")
|
||||
assert "must have a '.tar.gz' extension" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("os.path.isfile", return_value=True)
|
||||
@patch("os.path.exists", return_value=True)
|
||||
def test_existing_output_no_overwrite(_, __, ___, tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=False)
|
||||
assert "overwrite is set to False" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("zipfile.ZipFile", side_effect=PermissionError)
|
||||
def test_permission_error(mock_zip, _, tool):
|
||||
result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True)
|
||||
assert "Permission denied" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("zipfile.ZipFile", side_effect=FileNotFoundError)
|
||||
def test_file_not_found_during_zip(mock_zip, _, tool):
|
||||
result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True)
|
||||
assert "File not found" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("zipfile.ZipFile", side_effect=Exception("Unexpected"))
|
||||
def test_general_exception_during_zip(mock_zip, _, tool):
|
||||
result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True)
|
||||
assert "unexpected error" in result
|
||||
|
||||
# Test: Output directory is created when missing
|
||||
@patch("os.makedirs")
|
||||
@patch("os.path.exists", return_value=False)
|
||||
def test_prepare_output_makes_dir(mock_exists, mock_makedirs):
|
||||
tool = FileCompressorTool()
|
||||
result = tool._prepare_output("some/missing/path/file.zip", overwrite=True)
|
||||
assert result is True
|
||||
mock_makedirs.assert_called_once()
|
||||
Reference in New Issue
Block a user