Add chunk reading functionality to FileReadTool (#266)

* Add chunk reading functionality to FileReadTool

- Added start_line parameter to specify which line to start reading from
- Added line_count parameter to specify how many lines to read
- Updated documentation with new parameters and examples

* [FIX] Bugs and Disscutions

Fixed: start_line negative value
Improved: File Reading Operations

* [IMPROVE] Simplify line selection

* [REFACTOR] use mock_open while preserving essential filesystem tests
This commit is contained in:
Milad Noroozi
2025-04-22 17:48:29 +03:30
committed by GitHub
parent a270742319
commit 7973c163f3
3 changed files with 150 additions and 43 deletions

View File

@@ -1,9 +1,13 @@
# FileReadTool
## Description
The FileReadTool is a versatile component of the crewai_tools package, designed to streamline the process of reading and retrieving content from files. It is particularly useful in scenarios such as batch text file processing, runtime configuration file reading, and data importation for analytics. This tool supports various text-based file formats including `.txt`, `.csv`, `.json`, and adapts its functionality based on the file type, for instance, converting JSON content into a Python dictionary for easy use.
The tool also supports reading specific chunks of a file by specifying a starting line and the number of lines to read, which is helpful when working with large files that don't need to be loaded entirely into memory.
## Installation
Install the crewai_tools package to use the FileReadTool in your projects:
```shell
@@ -11,6 +15,7 @@ pip install 'crewai[tools]'
```
## Example
To get started with the FileReadTool:
```python
@@ -23,7 +28,13 @@ file_read_tool = FileReadTool()
# Initialize the tool with a specific file path, so the agent can only read the content of the specified file
file_read_tool = FileReadTool(file_path='path/to/your/file.txt')
# Read a specific chunk of the file (lines 100-149)
partial_content = file_read_tool.run(file_path='path/to/your/file.txt', start_line=100, line_count=50)
```
## Arguments
- `file_path`: The path to the file you want to read. It accepts both absolute and relative paths. Ensure the file exists and you have the necessary permissions to access it.
- `file_path`: The path to the file you want to read. It accepts both absolute and relative paths. Ensure the file exists and you have the necessary permissions to access it.
- `start_line`: (Optional) The line number to start reading from (1-indexed). Defaults to 1 (the first line).
- `line_count`: (Optional) The number of lines to read. If not provided, reads from the start_line to the end of the file.

View File

@@ -8,6 +8,8 @@ class FileReadToolSchema(BaseModel):
"""Input for FileReadTool."""
file_path: str = Field(..., description="Mandatory file full path to read the file")
start_line: Optional[int] = Field(1, description="Line number to start reading from (1-indexed)")
line_count: Optional[int] = Field(None, description="Number of lines to read. If None, reads the entire file")
class FileReadTool(BaseTool):
@@ -31,10 +33,11 @@ class FileReadTool(BaseTool):
>>> tool = FileReadTool(file_path="/path/to/file.txt")
>>> content = tool.run() # Reads /path/to/file.txt
>>> content = tool.run(file_path="/path/to/other.txt") # Reads other.txt
>>> content = tool.run(file_path="/path/to/file.txt", start_line=100, line_count=50) # Reads lines 100-149
"""
name: str = "Read a file's content"
description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read."
description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read."
args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None
@@ -47,8 +50,10 @@ class FileReadTool(BaseTool):
**kwargs: Additional keyword arguments passed to BaseTool.
"""
if file_path is not None:
kwargs['description'] = f"A tool that reads file content. The default file is {file_path}, but you can provide a different 'file_path' parameter to read another file."
kwargs["description"] = (
f"A tool that reads file content. The default file is {file_path}, but you can provide a different 'file_path' parameter to read another file. You can also specify 'start_line' and 'line_count' to read specific parts of the file."
)
super().__init__(**kwargs)
self.file_path = file_path
@@ -57,15 +62,34 @@ class FileReadTool(BaseTool):
**kwargs: Any,
) -> str:
file_path = kwargs.get("file_path", self.file_path)
start_line = kwargs.get("start_line", 1)
line_count = kwargs.get("line_count", None)
if file_path is None:
return "Error: No file path provided. Please provide a file path either in the constructor or as an argument."
return (
"Error: No file path provided. Please provide a file path either in the constructor or as an argument."
)
try:
with open(file_path, "r") as file:
return file.read()
if start_line == 1 and line_count is None:
return file.read()
start_idx = max(start_line - 1, 0)
selected_lines = [
line
for i, line in enumerate(file)
if i >= start_idx and (line_count is None or i < start_idx + line_count)
]
if not selected_lines and start_idx > 0:
return f"Error: Start line {start_line} exceeds the number of lines in the file."
return "".join(selected_lines)
except FileNotFoundError:
return f"Error: File not found at path: {file_path}"
except PermissionError:
return f"Error: Permission denied when trying to read file: {file_path}"
except Exception as e:
return f"Error: Failed to read file {file_path}. {str(e)}"
return f"Error: Failed to read file {file_path}. {str(e)}"

View File

@@ -1,4 +1,5 @@
import os
from unittest.mock import mock_open, patch
from crewai_tools import FileReadTool
@@ -22,19 +23,15 @@ def test_file_read_tool_constructor():
def test_file_read_tool_run():
"""Test FileReadTool _run method with file_path at runtime."""
# Create a temporary test file
test_file = "/tmp/test_file.txt"
test_content = "Hello, World!"
with open(test_file, "w") as f:
f.write(test_content)
# Test reading file with runtime file_path
tool = FileReadTool()
result = tool._run(file_path=test_file)
assert result == test_content
# Clean up
os.remove(test_file)
# Use mock_open to mock file operations
with patch("builtins.open", mock_open(read_data=test_content)):
# Test reading file with runtime file_path
tool = FileReadTool()
result = tool._run(file_path=test_file)
assert result == test_content
def test_file_read_tool_error_handling():
@@ -48,41 +45,116 @@ def test_file_read_tool_error_handling():
result = tool._run(file_path="/nonexistent/file.txt")
assert "Error: File not found at path:" in result
# Test permission error (create a file without read permissions)
test_file = "/tmp/no_permission.txt"
with open(test_file, "w") as f:
f.write("test")
os.chmod(test_file, 0o000)
result = tool._run(file_path=test_file)
assert "Error: Permission denied" in result
# Clean up
os.chmod(test_file, 0o666) # Restore permissions to delete
os.remove(test_file)
# Test permission error
with patch("builtins.open", side_effect=PermissionError()):
result = tool._run(file_path="/tmp/no_permission.txt")
assert "Error: Permission denied" in result
def test_file_read_tool_constructor_and_run():
"""Test FileReadTool using both constructor and runtime file paths."""
# Create two test files
test_file1 = "/tmp/test1.txt"
test_file2 = "/tmp/test2.txt"
content1 = "File 1 content"
content2 = "File 2 content"
with open(test_file1, "w") as f1, open(test_file2, "w") as f2:
f1.write(content1)
f2.write(content2)
# First test with content1
with patch("builtins.open", mock_open(read_data=content1)):
tool = FileReadTool(file_path=test_file1)
result = tool._run()
assert result == content1
# Test that constructor file_path works
tool = FileReadTool(file_path=test_file1)
result = tool._run()
assert result == content1
# Then test with content2 (should override constructor file_path)
with patch("builtins.open", mock_open(read_data=content2)):
result = tool._run(file_path=test_file2)
assert result == content2
# Test that runtime file_path overrides constructor
result = tool._run(file_path=test_file2)
assert result == content2
# Clean up
os.remove(test_file1)
os.remove(test_file2)
def test_file_read_tool_chunk_reading():
"""Test FileReadTool reading specific chunks of a file."""
test_file = "/tmp/multiline_test.txt"
lines = [
"Line 1\n",
"Line 2\n",
"Line 3\n",
"Line 4\n",
"Line 5\n",
"Line 6\n",
"Line 7\n",
"Line 8\n",
"Line 9\n",
"Line 10\n",
]
file_content = "".join(lines)
with patch("builtins.open", mock_open(read_data=file_content)):
tool = FileReadTool()
# Test reading a specific chunk (lines 3-5)
result = tool._run(file_path=test_file, start_line=3, line_count=3)
expected = "".join(lines[2:5]) # Lines are 0-indexed in the array
assert result == expected
# Test reading from a specific line to the end
result = tool._run(file_path=test_file, start_line=8)
expected = "".join(lines[7:])
assert result == expected
# Test with default values (should read entire file)
result = tool._run(file_path=test_file)
expected = "".join(lines)
assert result == expected
# Test when start_line is 1 but line_count is specified
result = tool._run(file_path=test_file, start_line=1, line_count=5)
expected = "".join(lines[0:5])
assert result == expected
def test_file_read_tool_chunk_error_handling():
"""Test error handling for chunk reading."""
test_file = "/tmp/short_test.txt"
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
file_content = "".join(lines)
with patch("builtins.open", mock_open(read_data=file_content)):
tool = FileReadTool()
# Test start_line exceeding file length
result = tool._run(file_path=test_file, start_line=10)
assert "Error: Start line 10 exceeds the number of lines in the file" in result
# Test reading partial chunk when line_count exceeds available lines
result = tool._run(file_path=test_file, start_line=2, line_count=10)
expected = "".join(lines[1:]) # Should return from line 2 to end
assert result == expected
def test_file_read_tool_zero_or_negative_start_line():
"""Test that start_line values of 0 or negative read from the start of the file."""
test_file = "/tmp/negative_test.txt"
lines = ["Line 1\n", "Line 2\n", "Line 3\n", "Line 4\n", "Line 5\n"]
file_content = "".join(lines)
with patch("builtins.open", mock_open(read_data=file_content)):
tool = FileReadTool()
# Test with start_line = 0
result = tool._run(file_path=test_file, start_line=0)
expected = "".join(lines) # Should read the entire file
assert result == expected
# Test with start_line = 0 and limited line count
result = tool._run(file_path=test_file, start_line=0, line_count=3)
expected = "".join(lines[0:3]) # Should read first 3 lines
assert result == expected
# Test with negative start_line
result = tool._run(file_path=test_file, start_line=-5)
expected = "".join(lines) # Should read the entire file
assert result == expected
# Test with negative start_line and limited line count
result = tool._run(file_path=test_file, start_line=-10, line_count=2)
expected = "".join(lines[0:2]) # Should read first 2 lines
assert result == expected