feat: add ReadFileTool for agent file access

- Create read_file tool for agents to access attached files
- Support reading by file name from crew/task file store
- Add unit tests for ReadFileTool
This commit is contained in:
Greyson LaLonde
2026-01-21 20:12:11 -05:00
parent 4ed5e4ca0e
commit b035aa8947
2 changed files with 202 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
"""Tool for reading input files provided to the crew."""
from __future__ import annotations
import base64
from typing import TYPE_CHECKING
from pydantic import BaseModel, Field, PrivateAttr
from crewai.tools.base_tool import BaseTool
if TYPE_CHECKING:
from crewai.utilities.files import FileInput
class ReadFileToolSchema(BaseModel):
"""Schema for read file tool arguments."""
file_name: str = Field(..., description="The name of the input file to read")
class ReadFileTool(BaseTool):
"""Tool for reading input files provided to the crew kickoff.
Provides agents access to files passed via the `files` key in inputs.
"""
name: str = "read_file"
description: str = (
"Read content from an input file by name. "
"Returns file content as text for text files, or base64 for binary files."
)
args_schema: type[BaseModel] = ReadFileToolSchema
_files: dict[str, FileInput] | None = PrivateAttr(default=None)
def set_files(self, files: dict[str, FileInput] | None) -> None:
"""Set available input files.
Args:
files: Dictionary mapping file names to file inputs.
"""
self._files = files
def _run(self, file_name: str, **kwargs: object) -> str:
"""Read an input file by name.
Args:
file_name: The name of the file to read.
Returns:
File content as text for text files, or base64 encoded for binary.
"""
if not self._files:
return "No input files available."
if file_name not in self._files:
available = ", ".join(self._files.keys())
return f"File '{file_name}' not found. Available files: {available}"
file_input = self._files[file_name]
content = file_input.read()
content_type = file_input.content_type
filename = file_input.filename or file_name
# Text-based content types
text_types = (
"text/",
"application/json",
"application/xml",
"application/x-yaml",
)
if any(content_type.startswith(t) for t in text_types):
return content.decode("utf-8")
# Binary content - return base64 encoded
encoded = base64.b64encode(content).decode("ascii")
return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}"

View File

@@ -0,0 +1,122 @@
"""Unit tests for ReadFileTool."""
import base64
import pytest
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
from crewai.utilities.files import ImageFile, PDFFile, TextFile
class TestReadFileTool:
"""Tests for ReadFileTool."""
def setup_method(self) -> None:
"""Set up test fixtures."""
self.tool = ReadFileTool()
def test_tool_metadata(self) -> None:
"""Test tool has correct name and description."""
assert self.tool.name == "read_file"
assert "Read content from an input file" in self.tool.description
def test_run_no_files_available(self) -> None:
"""Test _run returns message when no files are set."""
result = self.tool._run(file_name="any.txt")
assert result == "No input files available."
def test_run_file_not_found(self) -> None:
"""Test _run returns message when file not found."""
self.tool.set_files({"doc.txt": TextFile(source=b"content")})
result = self.tool._run(file_name="missing.txt")
assert "File 'missing.txt' not found" in result
assert "doc.txt" in result # Lists available files
def test_run_text_file(self) -> None:
"""Test reading a text file returns decoded content."""
text_content = "Hello, this is text content!"
self.tool.set_files({"readme.txt": TextFile(source=text_content.encode())})
result = self.tool._run(file_name="readme.txt")
assert result == text_content
def test_run_json_file(self) -> None:
"""Test reading a JSON file returns decoded content."""
json_content = '{"key": "value"}'
self.tool.set_files({"data.json": TextFile(source=json_content.encode())})
result = self.tool._run(file_name="data.json")
assert result == json_content
def test_run_binary_file_returns_base64(self) -> None:
"""Test reading a binary file returns base64 encoded content."""
# Minimal valid PNG structure for proper MIME detection
png_bytes = (
b"\x89PNG\r\n\x1a\n"
b"\x00\x00\x00\rIHDR"
b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00"
b"\x90wS\xde"
b"\x00\x00\x00\x00IEND\xaeB`\x82"
)
self.tool.set_files({"image.png": ImageFile(source=png_bytes)})
result = self.tool._run(file_name="image.png")
assert "[Binary file:" in result
assert "image/png" in result
assert "Base64:" in result
# Verify base64 can be decoded
b64_part = result.split("Base64: ")[1]
decoded = base64.b64decode(b64_part)
assert decoded == png_bytes
def test_run_pdf_file_returns_base64(self) -> None:
"""Test reading a PDF file returns base64 encoded content."""
pdf_bytes = b"%PDF-1.4 some content here"
self.tool.set_files({"doc.pdf": PDFFile(source=pdf_bytes)})
result = self.tool._run(file_name="doc.pdf")
assert "[Binary file:" in result
assert "application/pdf" in result
def test_set_files_none(self) -> None:
"""Test setting files to None."""
self.tool.set_files({"doc": TextFile(source=b"content")})
self.tool.set_files(None)
result = self.tool._run(file_name="doc")
assert result == "No input files available."
def test_run_multiple_files(self) -> None:
"""Test tool can access multiple files."""
self.tool.set_files({
"file1.txt": TextFile(source=b"content 1"),
"file2.txt": TextFile(source=b"content 2"),
"file3.txt": TextFile(source=b"content 3"),
})
assert self.tool._run(file_name="file1.txt") == "content 1"
assert self.tool._run(file_name="file2.txt") == "content 2"
assert self.tool._run(file_name="file3.txt") == "content 3"
def test_run_with_kwargs(self) -> None:
"""Test _run ignores extra kwargs."""
self.tool.set_files({"doc.txt": TextFile(source=b"content")})
result = self.tool._run(file_name="doc.txt", extra_arg="ignored")
assert result == "content"
def test_args_schema(self) -> None:
"""Test that args_schema is properly defined."""
schema = self.tool.args_schema
assert "file_name" in schema.model_fields
assert schema.model_fields["file_name"].is_required()