diff --git a/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py b/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py new file mode 100644 index 000000000..97b974c6b --- /dev/null +++ b/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py @@ -0,0 +1,80 @@ +"""Tool for reading input files provided to the crew.""" + +from __future__ import annotations + +import base64 +from typing import TYPE_CHECKING + +from pydantic import BaseModel, Field, PrivateAttr + +from crewai.tools.base_tool import BaseTool + + +if TYPE_CHECKING: + from crewai.utilities.files import FileInput + + +class ReadFileToolSchema(BaseModel): + """Schema for read file tool arguments.""" + + file_name: str = Field(..., description="The name of the input file to read") + + +class ReadFileTool(BaseTool): + """Tool for reading input files provided to the crew kickoff. + + Provides agents access to files passed via the `files` key in inputs. + """ + + name: str = "read_file" + description: str = ( + "Read content from an input file by name. " + "Returns file content as text for text files, or base64 for binary files." + ) + args_schema: type[BaseModel] = ReadFileToolSchema + + _files: dict[str, FileInput] | None = PrivateAttr(default=None) + + def set_files(self, files: dict[str, FileInput] | None) -> None: + """Set available input files. + + Args: + files: Dictionary mapping file names to file inputs. + """ + self._files = files + + def _run(self, file_name: str, **kwargs: object) -> str: + """Read an input file by name. + + Args: + file_name: The name of the file to read. + + Returns: + File content as text for text files, or base64 encoded for binary. + """ + if not self._files: + return "No input files available." + + if file_name not in self._files: + available = ", ".join(self._files.keys()) + return f"File '{file_name}' not found. Available files: {available}" + + file_input = self._files[file_name] + content = file_input.read() + content_type = file_input.content_type + filename = file_input.filename or file_name + + # Text-based content types + text_types = ( + "text/", + "application/json", + "application/xml", + "application/x-yaml", + ) + + if any(content_type.startswith(t) for t in text_types): + return content.decode("utf-8") + + # Binary content - return base64 encoded + encoded = base64.b64encode(content).decode("ascii") + return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}" diff --git a/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py b/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py new file mode 100644 index 000000000..5f521f974 --- /dev/null +++ b/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py @@ -0,0 +1,122 @@ +"""Unit tests for ReadFileTool.""" + +import base64 + +import pytest + +from crewai.tools.agent_tools.read_file_tool import ReadFileTool +from crewai.utilities.files import ImageFile, PDFFile, TextFile + + +class TestReadFileTool: + """Tests for ReadFileTool.""" + + def setup_method(self) -> None: + """Set up test fixtures.""" + self.tool = ReadFileTool() + + def test_tool_metadata(self) -> None: + """Test tool has correct name and description.""" + assert self.tool.name == "read_file" + assert "Read content from an input file" in self.tool.description + + def test_run_no_files_available(self) -> None: + """Test _run returns message when no files are set.""" + result = self.tool._run(file_name="any.txt") + assert result == "No input files available." + + def test_run_file_not_found(self) -> None: + """Test _run returns message when file not found.""" + self.tool.set_files({"doc.txt": TextFile(source=b"content")}) + + result = self.tool._run(file_name="missing.txt") + + assert "File 'missing.txt' not found" in result + assert "doc.txt" in result # Lists available files + + def test_run_text_file(self) -> None: + """Test reading a text file returns decoded content.""" + text_content = "Hello, this is text content!" + self.tool.set_files({"readme.txt": TextFile(source=text_content.encode())}) + + result = self.tool._run(file_name="readme.txt") + + assert result == text_content + + def test_run_json_file(self) -> None: + """Test reading a JSON file returns decoded content.""" + json_content = '{"key": "value"}' + self.tool.set_files({"data.json": TextFile(source=json_content.encode())}) + + result = self.tool._run(file_name="data.json") + + assert result == json_content + + def test_run_binary_file_returns_base64(self) -> None: + """Test reading a binary file returns base64 encoded content.""" + # Minimal valid PNG structure for proper MIME detection + png_bytes = ( + b"\x89PNG\r\n\x1a\n" + b"\x00\x00\x00\rIHDR" + b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00" + b"\x90wS\xde" + b"\x00\x00\x00\x00IEND\xaeB`\x82" + ) + self.tool.set_files({"image.png": ImageFile(source=png_bytes)}) + + result = self.tool._run(file_name="image.png") + + assert "[Binary file:" in result + assert "image/png" in result + assert "Base64:" in result + + # Verify base64 can be decoded + b64_part = result.split("Base64: ")[1] + decoded = base64.b64decode(b64_part) + assert decoded == png_bytes + + def test_run_pdf_file_returns_base64(self) -> None: + """Test reading a PDF file returns base64 encoded content.""" + pdf_bytes = b"%PDF-1.4 some content here" + self.tool.set_files({"doc.pdf": PDFFile(source=pdf_bytes)}) + + result = self.tool._run(file_name="doc.pdf") + + assert "[Binary file:" in result + assert "application/pdf" in result + + def test_set_files_none(self) -> None: + """Test setting files to None.""" + self.tool.set_files({"doc": TextFile(source=b"content")}) + self.tool.set_files(None) + + result = self.tool._run(file_name="doc") + + assert result == "No input files available." + + def test_run_multiple_files(self) -> None: + """Test tool can access multiple files.""" + self.tool.set_files({ + "file1.txt": TextFile(source=b"content 1"), + "file2.txt": TextFile(source=b"content 2"), + "file3.txt": TextFile(source=b"content 3"), + }) + + assert self.tool._run(file_name="file1.txt") == "content 1" + assert self.tool._run(file_name="file2.txt") == "content 2" + assert self.tool._run(file_name="file3.txt") == "content 3" + + def test_run_with_kwargs(self) -> None: + """Test _run ignores extra kwargs.""" + self.tool.set_files({"doc.txt": TextFile(source=b"content")}) + + result = self.tool._run(file_name="doc.txt", extra_arg="ignored") + + assert result == "content" + + def test_args_schema(self) -> None: + """Test that args_schema is properly defined.""" + schema = self.tool.args_schema + + assert "file_name" in schema.model_fields + assert schema.model_fields["file_name"].is_required() \ No newline at end of file