Compare commits

...

9 Commits

Author SHA1 Message Date
Lorenze Jay
46faa2a0be Merge branch 'main' into lorenze/feat/file-discovery-tools 2026-02-09 11:28:59 -08:00
github-actions[bot]
6771629745 chore: update tool specifications 2026-02-04 22:12:01 +00:00
lorenzejay
6996a37e23 move to crewai-tools 2026-02-04 12:34:13 -08:00
lorenzejay
f69c9c3f7b Merge branch 'lorenze/feat/grep-tool' of github.com:crewAIInc/crewAI into lorenze/feat/file-discovery-tools 2026-02-04 11:58:33 -08:00
github-actions[bot]
c9971a7418 chore: update tool specifications 2026-02-04 19:52:01 +00:00
lorenzejay
f04bedc9ab moved to tools 2026-02-04 11:50:43 -08:00
lorenzejay
7e16744361 linted 2026-02-04 11:40:02 -08:00
lorenzejay
1078dbd886 feat: introduce GlobTool for file pattern matching
- Added GlobTool to facilitate finding files that match specified glob patterns.
- Enhanced agent_tools module to include GlobTool and GrepTool.
- Implemented comprehensive functionality for recursive file searching, output formatting, and handling of hidden files.
- Created unit tests for GlobTool to ensure reliability and correctness in various scenarios.

This addition complements existing tools and enhances the file management capabilities within the CrewAI framework.
2026-02-04 11:39:44 -08:00
Lorenze Jay
5a14007511 native support for grep 2026-02-04 10:28:35 -08:00
12 changed files with 1907 additions and 42 deletions

View File

@@ -77,6 +77,8 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
GenerateCrewaiAutomationTool,
)
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
from crewai_tools.tools.glob_tool.glob_tool import GlobTool
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
HyperbrowserLoadTool,
)
@@ -230,6 +232,8 @@ __all__ = [
"FirecrawlSearchTool",
"GenerateCrewaiAutomationTool",
"GithubSearchTool",
"GlobTool",
"GrepTool",
"HyperbrowserLoadTool",
"InvokeCrewAIAutomationTool",
"JSONSearchTool",

View File

@@ -66,6 +66,8 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
GenerateCrewaiAutomationTool,
)
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
from crewai_tools.tools.glob_tool.glob_tool import GlobTool
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
HyperbrowserLoadTool,
)
@@ -214,6 +216,8 @@ __all__ = [
"FirecrawlSearchTool",
"GenerateCrewaiAutomationTool",
"GithubSearchTool",
"GlobTool",
"GrepTool",
"HyperbrowserLoadTool",
"InvokeCrewAIAutomationTool",
"JSONSearchTool",

View File

@@ -1,28 +1,61 @@
"""Tool for reading file contents from disk with line number support."""
from __future__ import annotations
from pathlib import Path
from typing import Any
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
BINARY_CHECK_SIZE = 8192
MAX_LINE_LENGTH = 500
DEFAULT_LINE_LIMIT = 500
class FileReadToolSchema(BaseModel):
"""Input for FileReadTool."""
file_path: str = Field(..., description="Mandatory file full path to read the file")
offset: int | None = Field(
None,
description=(
"Line number to start reading from. Positive values are 1-indexed from "
"the start. Negative values count from the end (e.g., -10 reads last 10 lines). "
"If None, reads from the beginning."
),
)
limit: int | None = Field(
None,
description=(
"Maximum number of lines to read. If None, reads up to the default limit "
f"({DEFAULT_LINE_LIMIT} lines) for large files, or entire file for small files."
),
)
include_line_numbers: bool = Field(
True,
description="Whether to prefix each line with its line number (format: 'LINE_NUMBER|CONTENT')",
)
start_line: int | None = Field(
1, description="Line number to start reading from (1-indexed)"
None,
description="[DEPRECATED: Use 'offset' instead] Line number to start reading from (1-indexed).",
)
line_count: int | None = Field(
None, description="Number of lines to read. If None, reads the entire file"
None,
description="[DEPRECATED: Use 'limit' instead] Number of lines to read.",
)
class FileReadTool(BaseTool):
"""A tool for reading file contents.
"""A tool for reading file contents with line number support.
This tool inherits its schema handling from BaseTool to avoid recursive schema
definition issues. The args_schema is set to FileReadToolSchema which defines
the required file_path parameter. The schema should not be overridden in the
constructor as it would break the inheritance chain and cause infinite loops.
This tool provides Claude Code-like file reading capabilities:
- Line number prefixes for easy reference
- Offset/limit support for reading specific portions of large files
- Negative offset support for reading from end of file
- Binary file detection
- File metadata (total lines) in response header
The tool supports two ways of specifying the file path:
1. At construction time via the file_path parameter
@@ -34,16 +67,23 @@ class FileReadTool(BaseTool):
**kwargs: Additional keyword arguments passed to BaseTool.
Example:
>>> tool = FileReadTool(file_path="/path/to/file.txt")
>>> content = tool.run() # Reads /path/to/file.txt
>>> content = tool.run(file_path="/path/to/other.txt") # Reads other.txt
>>> tool = FileReadTool()
>>> content = tool.run(file_path="/path/to/file.txt") # Reads entire file
>>> content = tool.run(
... file_path="/path/to/file.txt", start_line=100, line_count=50
... ) # Reads lines 100-149
... file_path="/path/to/file.txt", offset=100, limit=50
... ) # Lines 100-149
>>> content = tool.run(
... file_path="/path/to/file.txt", offset=-20
... ) # Last 20 lines
"""
name: str = "Read a file's content"
description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read."
name: str = "read_file"
description: str = (
"Read content from a file on disk. Returns file content with line numbers "
"prefixed (format: 'LINE_NUMBER|CONTENT'). Use 'offset' to start from a "
"specific line (negative values read from end), and 'limit' to control "
"how many lines to read. For large files, reads are automatically limited."
)
args_schema: type[BaseModel] = FileReadToolSchema
file_path: str | None = None
@@ -57,46 +97,152 @@ class FileReadTool(BaseTool):
"""
if file_path is not None:
kwargs["description"] = (
f"A tool that reads file content. The default file is {file_path}, but you can provide a different 'file_path' parameter to read another file. You can also specify 'start_line' and 'line_count' to read specific parts of the file."
f"Read content from a file. The default file is {file_path}, but you "
"can provide a different 'file_path' parameter. Use 'offset' to start "
"from a specific line and 'limit' to control the number of lines read."
)
super().__init__(**kwargs)
self.file_path = file_path
def _is_binary_file(self, file_path: Path) -> bool:
"""Check if a file is binary by looking for null bytes.
Args:
file_path: Path to the file.
Returns:
True if the file appears to be binary.
"""
try:
with open(file_path, "rb") as f:
chunk = f.read(BINARY_CHECK_SIZE)
return b"\x00" in chunk
except (OSError, PermissionError):
return True
def _count_lines(self, file_path: Path) -> int:
"""Count total lines in a file efficiently.
Args:
file_path: Path to the file.
Returns:
Total number of lines in the file.
"""
try:
with open(file_path, "rb") as f:
return sum(1 for _ in f)
except (OSError, PermissionError):
return 0
def _run(
self,
file_path: str | None = None,
start_line: int | None = 1,
offset: int | None = None,
limit: int | None = None,
include_line_numbers: bool = True,
start_line: int | None = None,
line_count: int | None = None,
) -> str:
"""Read file contents with optional line range.
Args:
file_path: Path to the file to read.
offset: Line to start from (1-indexed, negative counts from end).
limit: Maximum lines to read.
include_line_numbers: Whether to prefix lines with numbers.
start_line: Legacy parameter (maps to offset).
line_count: Legacy parameter (maps to limit).
Returns:
File content with metadata header.
"""
if start_line is not None and offset is None:
offset = start_line
if line_count is not None and limit is None:
limit = line_count
file_path = file_path or self.file_path
start_line = start_line or 1
line_count = line_count or None
if file_path is None:
return "Error: No file path provided. Please provide a file path either in the constructor or as an argument."
try:
with open(file_path, "r") as file:
if start_line == 1 and line_count is None:
return file.read()
path = Path(file_path)
start_idx = max(start_line - 1, 0)
selected_lines = [
line
for i, line in enumerate(file)
if i >= start_idx
and (line_count is None or i < start_idx + line_count)
]
if not selected_lines and start_idx > 0:
return f"Error: Start line {start_line} exceeds the number of lines in the file."
return "".join(selected_lines)
except FileNotFoundError:
if not path.exists():
return f"Error: File not found at path: {file_path}"
if path.is_dir():
return f"Error: Path is a directory, not a file: {file_path}"
if self._is_binary_file(path):
file_size = path.stat().st_size
return (
f"Error: '{file_path}' appears to be a binary file ({file_size} bytes). "
"Binary files cannot be read as text. Use a specialized tool for binary content."
)
try:
total_lines = self._count_lines(path)
if total_lines == 0:
return f"File: {file_path}\nTotal lines: 0\n\n(Empty file)"
if offset is None:
start_idx = 0
elif offset < 0:
start_idx = max(0, total_lines + offset)
else:
start_idx = max(0, offset - 1)
if limit is None:
if total_lines > DEFAULT_LINE_LIMIT and offset is None:
effective_limit = DEFAULT_LINE_LIMIT
else:
effective_limit = total_lines - start_idx
else:
effective_limit = limit
end_idx = min(start_idx + effective_limit, total_lines)
with open(path, encoding="utf-8", errors="replace") as f:
lines: list[str] = []
for i, line in enumerate(f):
if i < start_idx:
continue
if i >= end_idx:
break
line_content = line.rstrip("\n\r")
if len(line_content) > MAX_LINE_LENGTH:
line_content = line_content[:MAX_LINE_LENGTH] + "..."
if include_line_numbers:
line_num = i + 1 # 1-indexed
lines.append(f"{line_num:6}|{line_content}")
else:
lines.append(line_content)
header_parts = [f"File: {file_path}", f"Total lines: {total_lines}"]
if start_idx > 0 or end_idx < total_lines:
header_parts.append(f"Showing lines: {start_idx + 1}-{end_idx}")
if end_idx < total_lines and limit is None and offset is None:
header_parts.append(
"(File truncated. Use 'offset' and 'limit' to read more.)"
)
header = "\n".join(header_parts)
content = "\n".join(lines)
return f"{header}\n\n{content}"
except PermissionError:
return f"Error: Permission denied when trying to read file: {file_path}"
except UnicodeDecodeError as e:
return f"Error: Failed to decode file {file_path} as text: {e!s}"
except Exception as e:
return f"Error: Failed to read file {file_path}. {e!s}"

View File

@@ -0,0 +1,4 @@
from crewai_tools.tools.glob_tool.glob_tool import GlobTool
__all__ = ["GlobTool"]

View File

@@ -0,0 +1,255 @@
"""Tool for finding files matching glob patterns."""
from __future__ import annotations
from dataclasses import dataclass
import os
from pathlib import Path
from typing import Literal
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
MAX_FILES = 1000
MAX_OUTPUT_CHARS = 30_000
SKIP_DIRS = frozenset(
{
".git",
"__pycache__",
"node_modules",
".venv",
"venv",
".tox",
".mypy_cache",
".pytest_cache",
".ruff_cache",
".coverage",
"dist",
"build",
".eggs",
"*.egg-info",
}
)
@dataclass
class FileInfo:
"""Information about a matched file."""
path: Path
size: int
is_dir: bool
class GlobToolSchema(BaseModel):
"""Schema for glob tool arguments."""
pattern: str = Field(
...,
description=(
"Glob pattern to match files. Examples: '*.py' (Python files), "
"'**/*.yaml' (all YAML files recursively), 'src/**/*.ts' (TypeScript in src), "
"'test_*.py' (test files). Patterns not starting with '**/' are auto-prefixed for recursive search."
),
)
path: str | None = Field(
default=None,
description="Directory to search in. Defaults to current working directory.",
)
output_mode: Literal["paths", "tree", "detailed"] = Field(
default="paths",
description=(
"Output format: 'paths' shows file paths one per line, "
"'tree' shows directory tree structure, "
"'detailed' includes file sizes."
),
)
include_hidden: bool = Field(
default=False,
description="Whether to include hidden files and directories (starting with '.').",
)
dirs_only: bool = Field(
default=False,
description="If True, only match directories, not files.",
)
files_only: bool = Field(
default=True,
description="If True (default), only match files, not directories.",
)
class GlobTool(BaseTool):
"""Tool for finding files matching glob patterns.
Recursively searches for files matching a glob pattern within a directory.
Useful for discovering files by name, extension, or path pattern.
Complements GrepTool which searches by file content.
Example:
>>> tool = GlobTool()
>>> result = tool.run(pattern="*.py", path="/path/to/project")
>>> result = tool.run(pattern="**/*.yaml", output_mode="detailed")
"""
name: str = "glob"
description: str = (
"Find files matching a glob pattern. Use to discover files by name or extension. "
"Examples: '*.py' finds all Python files, '**/*.yaml' finds YAML files recursively, "
"'test_*.py' finds test files. Returns matching file paths sorted by modification time."
)
args_schema: type[BaseModel] = GlobToolSchema
def _run(
self,
pattern: str,
path: str | None = None,
output_mode: Literal["paths", "tree", "detailed"] = "paths",
include_hidden: bool = False,
dirs_only: bool = False,
files_only: bool = True,
**kwargs: object,
) -> str:
"""Find files matching a glob pattern.
Args:
pattern: Glob pattern to match.
path: Directory to search in. Defaults to cwd.
output_mode: Output format (paths, tree, detailed).
include_hidden: Whether to include hidden files.
dirs_only: Only match directories.
files_only: Only match files (default True).
Returns:
Formatted list of matching paths.
"""
# Resolve search path
search_path = Path(path) if path else Path(os.getcwd())
if not search_path.exists():
return f"Error: Path '{search_path}' does not exist."
if not search_path.is_dir():
return f"Error: Path '{search_path}' is not a directory."
# Normalize pattern for recursive search
normalized_pattern = pattern
if not pattern.startswith("**/") and not pattern.startswith("/"):
if "/" not in pattern:
normalized_pattern = f"**/{pattern}"
matches: list[FileInfo] = []
try:
for match_path in search_path.glob(normalized_pattern):
if not include_hidden:
if any(
part.startswith(".")
for part in match_path.relative_to(search_path).parts
):
continue
rel_parts = match_path.relative_to(search_path).parts
if any(part in SKIP_DIRS for part in rel_parts):
continue
is_dir = match_path.is_dir()
if dirs_only and not is_dir:
continue
if files_only and is_dir:
continue
try:
size = match_path.stat().st_size if not is_dir else 0
matches.append(FileInfo(path=match_path, size=size, is_dir=is_dir))
except (OSError, PermissionError):
continue
if len(matches) >= MAX_FILES:
break
except Exception as e:
return f"Error: Failed to search with pattern '{pattern}': {e!s}"
if not matches:
return f"No files found matching pattern '{pattern}' in {search_path}"
try:
matches.sort(key=lambda f: f.path.stat().st_mtime, reverse=True)
except (OSError, PermissionError):
matches.sort(key=lambda f: str(f.path))
if output_mode == "detailed":
output = self._format_detailed(matches, search_path)
elif output_mode == "tree":
output = self._format_tree(matches, search_path)
else:
output = self._format_paths(matches, search_path)
summary = f"Found {len(matches)} file(s) matching '{pattern}'"
if len(matches) >= MAX_FILES:
summary += f" (limited to {MAX_FILES})"
result = f"{summary}\n\n{output}"
if len(result) > MAX_OUTPUT_CHARS:
result = (
result[:MAX_OUTPUT_CHARS]
+ "\n\n... Output truncated. Use a more specific pattern."
)
return result
def _format_paths(self, matches: list[FileInfo], base_path: Path) -> str:
"""Format as simple list of paths."""
return "\n".join(str(f.path) for f in matches)
def _format_detailed(self, matches: list[FileInfo], base_path: Path) -> str:
"""Format with file sizes."""
lines: list[str] = []
for f in matches:
size_str = self._format_size(f.size) if not f.is_dir else "<dir>"
rel_path = (
f.path.relative_to(base_path)
if f.path.is_relative_to(base_path)
else f.path
)
lines.append(f"{size_str:>10} {rel_path}")
return "\n".join(lines)
def _format_tree(self, matches: list[FileInfo], base_path: Path) -> str:
"""Format as directory tree structure."""
# Build tree structure
tree: dict[str, list[str]] = {}
for f in matches:
try:
rel_path = f.path.relative_to(base_path)
except ValueError:
rel_path = f.path
parent = str(rel_path.parent) if rel_path.parent != Path(".") else "."
if parent not in tree:
tree[parent] = []
tree[parent].append(rel_path.name + ("/" if f.is_dir else ""))
# Format tree output
lines: list[str] = [str(base_path)]
for directory in sorted(tree.keys()):
if directory != ".":
lines.append(f" {directory}/")
for filename in sorted(tree[directory]):
prefix = " " if directory != "." else " "
lines.append(f"{prefix}{filename}")
return "\n".join(lines)
def _format_size(self, size: int) -> str:
"""Format file size in human-readable form."""
size_float = float(size)
for unit in ["B", "KB", "MB", "GB"]:
if size_float < 1024:
return (
f"{size_float:.0f}{unit}"
if unit == "B"
else f"{size_float:.1f}{unit}"
)
size_float /= 1024
return f"{size_float:.1f}TB"

View File

@@ -0,0 +1,3 @@
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
__all__ = ["GrepTool"]

View File

@@ -0,0 +1,340 @@
"""Tool for searching file contents on disk using regex patterns."""
from __future__ import annotations
import os
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Literal
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
MAX_OUTPUT_CHARS = 50_000
MAX_FILES = 10_000
MAX_MATCHES_PER_FILE = 200
MAX_LINE_LENGTH = 500
BINARY_CHECK_SIZE = 8192
SKIP_DIRS = frozenset(
{
".git",
"__pycache__",
"node_modules",
".venv",
"venv",
".tox",
".mypy_cache",
".pytest_cache",
}
)
@dataclass
class MatchLine:
"""A single line from a search result."""
line_number: int
text: str
is_match: bool # True for match, False for context line
@dataclass
class FileSearchResult:
"""Search results for a single file."""
file_path: Path
matches: list[list[MatchLine]] = field(default_factory=list)
match_count: int = 0
class GrepToolSchema(BaseModel):
"""Schema for grep tool arguments."""
pattern: str = Field(
..., description="Regex pattern to search for in file contents"
)
path: str | None = Field(
default=None,
description="File or directory to search in. Defaults to current working directory.",
)
glob_pattern: str | None = Field(
default=None,
description="Glob pattern to filter files (e.g. '*.py', '*.{ts,tsx}')",
)
output_mode: Literal["content", "files_with_matches", "count"] = Field(
default="content",
description="Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
)
case_insensitive: bool = Field(
default=False,
description="Whether to perform case-insensitive matching",
)
context_lines: int = Field(
default=0,
description="Number of lines to show before and after each match",
)
include_line_numbers: bool = Field(
default=True,
description="Whether to prefix matching lines with line numbers",
)
class GrepTool(BaseTool):
"""Tool for searching file contents on disk using regex patterns.
Recursively searches files in a directory for lines matching a regex pattern.
Supports glob filtering, context lines, and multiple output modes.
Example:
>>> tool = GrepTool()
>>> result = tool.run(pattern="def.*main", path="/path/to/project")
>>> result = tool.run(
... pattern="TODO",
... path="/path/to/project",
... glob_pattern="*.py",
... context_lines=2,
... )
"""
name: str = "Search file contents"
description: str = (
"A tool that searches file contents on disk using regex patterns. "
"Recursively searches files in a directory for matching lines. "
"Returns matching content with line numbers, file paths only, or match counts."
)
args_schema: type[BaseModel] = GrepToolSchema
def _run(
self,
pattern: str,
path: str | None = None,
glob_pattern: str | None = None,
output_mode: Literal["content", "files_with_matches", "count"] = "content",
case_insensitive: bool = False,
context_lines: int = 0,
include_line_numbers: bool = True,
**kwargs: object,
) -> str:
"""Search files for a regex pattern.
Args:
pattern: Regex pattern to search for.
path: File or directory to search. Defaults to cwd.
glob_pattern: Glob pattern to filter files.
output_mode: What to return.
case_insensitive: Case-insensitive matching.
context_lines: Lines of context around matches.
include_line_numbers: Prefix lines with line numbers.
Returns:
Formatted search results as a string.
"""
# Resolve search path
search_path = Path(path) if path else Path(os.getcwd())
if not search_path.exists():
return f"Error: Path '{search_path}' does not exist."
# Compile regex
flags = re.IGNORECASE if case_insensitive else 0
try:
compiled = re.compile(pattern, flags)
except re.error as e:
return f"Error: Invalid regex pattern '{pattern}': {e}"
# Collect files
files = self._collect_files(search_path, glob_pattern)
# Search each file
results: list[FileSearchResult] = []
for file_path in files:
result = self._search_file(file_path, compiled, context_lines)
if result is not None:
results.append(result)
if not results:
return "No matches found."
# Format output
if output_mode == "files_with_matches":
output = self._format_files_with_matches(results)
elif output_mode == "count":
output = self._format_count(results)
else:
output = self._format_content(results, include_line_numbers)
# Truncate if needed
if len(output) > MAX_OUTPUT_CHARS:
output = (
output[:MAX_OUTPUT_CHARS]
+ "\n\n... Output truncated. Try a narrower search pattern or glob filter."
)
return output
def _collect_files(self, search_path: Path, glob_pattern: str | None) -> list[Path]:
"""Collect files to search.
Args:
search_path: File or directory to search.
glob_pattern: Optional glob pattern to filter files.
Returns:
List of file paths to search.
"""
if search_path.is_file():
return [search_path]
pattern = glob_pattern or "*"
files: list[Path] = []
for p in search_path.rglob(pattern):
if not p.is_file():
continue
# Skip hidden/build directories
if any(part in SKIP_DIRS for part in p.relative_to(search_path).parts):
continue
files.append(p)
if len(files) >= MAX_FILES:
break
return sorted(files)
def _is_binary_file(self, file_path: Path) -> bool:
"""Check if a file is binary by looking for null bytes.
Args:
file_path: Path to the file.
Returns:
True if the file appears to be binary.
"""
try:
with open(file_path, "rb") as f:
chunk = f.read(BINARY_CHECK_SIZE)
return b"\x00" in chunk
except (OSError, PermissionError):
return True
def _search_file(
self,
file_path: Path,
compiled_pattern: re.Pattern[str],
context_lines: int,
) -> FileSearchResult | None:
"""Search a single file for matches.
Args:
file_path: Path to the file.
compiled_pattern: Compiled regex pattern.
context_lines: Number of context lines around matches.
Returns:
FileSearchResult if matches found, None otherwise.
"""
if self._is_binary_file(file_path):
return None
try:
with open(file_path, encoding="utf-8", errors="replace") as f:
lines = f.readlines()
except (OSError, PermissionError):
return None
# Find matching line numbers
match_line_nums: list[int] = []
for i, line in enumerate(lines):
if compiled_pattern.search(line):
match_line_nums.append(i)
if len(match_line_nums) >= MAX_MATCHES_PER_FILE:
break
if not match_line_nums:
return None
# Build groups of contiguous match blocks with context
groups: list[list[MatchLine]] = []
current_group: list[MatchLine] = []
prev_end = -1
for match_idx in match_line_nums:
start = max(0, match_idx - context_lines)
end = min(len(lines), match_idx + context_lines + 1)
# If this block doesn't overlap with the previous, start a new group
if start > prev_end and current_group:
groups.append(current_group)
current_group = []
for i in range(max(start, prev_end), end):
text = lines[i].rstrip("\n\r")
if len(text) > MAX_LINE_LENGTH:
text = text[:MAX_LINE_LENGTH] + "..."
current_group.append(
MatchLine(
line_number=i + 1, # 1-indexed
text=text,
is_match=(i in match_line_nums),
)
)
prev_end = end
if current_group:
groups.append(current_group)
return FileSearchResult(
file_path=file_path,
matches=groups,
match_count=len(match_line_nums),
)
def _format_content(
self,
results: list[FileSearchResult],
include_line_numbers: bool,
) -> str:
"""Format results showing matching content.
Args:
results: List of file search results.
include_line_numbers: Whether to include line numbers.
Returns:
Formatted string with file paths and matching lines.
"""
parts: list[str] = []
for result in results:
parts.append(str(result.file_path))
for group_idx, group in enumerate(result.matches):
if group_idx > 0:
parts.append("--")
for match_line in group:
if include_line_numbers:
parts.append(f"{match_line.line_number}: {match_line.text}")
else:
parts.append(match_line.text)
parts.append("") # blank line between files
return "\n".join(parts).rstrip()
def _format_files_with_matches(self, results: list[FileSearchResult]) -> str:
"""Format results showing only file paths.
Args:
results: List of file search results.
Returns:
One file path per line.
"""
return "\n".join(str(r.file_path) for r in results)
def _format_count(self, results: list[FileSearchResult]) -> str:
"""Format results showing match counts per file.
Args:
results: List of file search results.
Returns:
Filepath and count per line.
"""
return "\n".join(f"{r.file_path}: {r.match_count}" for r in results)

View File

@@ -0,0 +1,182 @@
"""Unit tests for GlobTool."""
from pathlib import Path
import pytest
from crewai_tools import GlobTool
@pytest.fixture
def sample_dir(tmp_path: Path) -> Path:
"""Create a temp directory with sample files for testing."""
# src/main.py
src = tmp_path / "src"
src.mkdir()
(src / "main.py").write_text("print('hello')")
(src / "utils.py").write_text("def helper(): pass")
# src/components/button.tsx
components = src / "components"
components.mkdir()
(components / "button.tsx").write_text("export const Button = () => {}")
(components / "input.tsx").write_text("export const Input = () => {}")
# tests/test_main.py
tests = tmp_path / "tests"
tests.mkdir()
(tests / "test_main.py").write_text("def test_hello(): pass")
(tests / "test_utils.py").write_text("def test_helper(): pass")
# config files
(tmp_path / "config.yaml").write_text("key: value")
(tmp_path / "settings.json").write_text("{}")
# hidden file
(tmp_path / ".hidden").write_text("secret")
# empty directory
(tmp_path / "empty_dir").mkdir()
return tmp_path
class TestGlobTool:
"""Tests for GlobTool."""
def setup_method(self) -> None:
"""Set up test fixtures."""
self.tool = GlobTool()
def test_tool_metadata(self) -> None:
"""Test tool has correct name and description."""
assert self.tool.name == "glob"
assert "pattern" in self.tool.description.lower()
def test_args_schema(self) -> None:
"""Test that args_schema has correct fields."""
schema = self.tool.args_schema
fields = schema.model_fields
assert "pattern" in fields
assert fields["pattern"].is_required()
assert "path" in fields
assert not fields["path"].is_required()
assert "output_mode" in fields
assert not fields["output_mode"].is_required()
assert "include_hidden" in fields
assert not fields["include_hidden"].is_required()
def test_find_python_files(self, sample_dir: Path) -> None:
"""Test finding Python files with *.py pattern."""
result = self.tool._run(pattern="*.py", path=str(sample_dir))
assert "main.py" in result
assert "utils.py" in result
assert "test_main.py" in result
assert "test_utils.py" in result
def test_find_specific_extension(self, sample_dir: Path) -> None:
"""Test finding files with specific extension."""
result = self.tool._run(pattern="*.tsx", path=str(sample_dir))
assert "button.tsx" in result
assert "input.tsx" in result
assert "main.py" not in result
def test_find_test_files(self, sample_dir: Path) -> None:
"""Test finding test files with test_*.py pattern."""
result = self.tool._run(pattern="test_*.py", path=str(sample_dir))
assert "test_main.py" in result
assert "test_utils.py" in result
# Verify non-test files are not included (check for exact filename, not substring)
lines = result.split("\n")
file_lines = [l for l in lines if l.endswith(".py")]
assert not any(l.endswith("/main.py") or l == "main.py" for l in file_lines)
assert not any(l.endswith("/utils.py") or l == "utils.py" for l in file_lines)
def test_recursive_pattern(self, sample_dir: Path) -> None:
"""Test explicit recursive pattern **/*.py."""
result = self.tool._run(pattern="**/*.py", path=str(sample_dir))
assert "main.py" in result
assert "test_main.py" in result
def test_hidden_files_excluded_by_default(self, sample_dir: Path) -> None:
"""Test hidden files are excluded by default."""
result = self.tool._run(pattern="*", path=str(sample_dir))
assert ".hidden" not in result
def test_hidden_files_included(self, sample_dir: Path) -> None:
"""Test hidden files included when include_hidden=True."""
result = self.tool._run(
pattern=".*", path=str(sample_dir), include_hidden=True
)
assert ".hidden" in result
def test_output_mode_paths(self, sample_dir: Path) -> None:
"""Test paths output mode shows file paths."""
result = self.tool._run(
pattern="*.py", path=str(sample_dir), output_mode="paths"
)
# Should contain full paths
assert str(sample_dir) in result or "main.py" in result
def test_output_mode_detailed(self, sample_dir: Path) -> None:
"""Test detailed output mode shows sizes."""
result = self.tool._run(
pattern="*.py", path=str(sample_dir), output_mode="detailed"
)
# Should contain size indicators
assert "B" in result # Bytes indicator
def test_output_mode_tree(self, sample_dir: Path) -> None:
"""Test tree output mode shows directory structure."""
result = self.tool._run(
pattern="*.py", path=str(sample_dir), output_mode="tree"
)
assert str(sample_dir) in result
def test_dirs_only(self, sample_dir: Path) -> None:
"""Test dirs_only=True only returns directories."""
result = self.tool._run(
pattern="*",
path=str(sample_dir),
dirs_only=True,
files_only=False,
)
assert "src" in result or "tests" in result or "empty_dir" in result
# Should not contain file extensions
assert ".py" not in result
assert ".yaml" not in result
def test_path_not_found(self) -> None:
"""Test error message when path doesn't exist."""
result = self.tool._run(pattern="*.py", path="/nonexistent/path")
assert "Error" in result
assert "does not exist" in result
def test_path_is_file(self, sample_dir: Path) -> None:
"""Test error message when path is a file, not directory."""
file_path = sample_dir / "config.yaml"
result = self.tool._run(pattern="*.py", path=str(file_path))
assert "Error" in result
assert "not a directory" in result
def test_no_matches(self, sample_dir: Path) -> None:
"""Test message when no files match pattern."""
result = self.tool._run(pattern="*.xyz", path=str(sample_dir))
assert "No files found" in result
def test_found_count_in_output(self, sample_dir: Path) -> None:
"""Test that result includes count of found files."""
result = self.tool._run(pattern="*.py", path=str(sample_dir))
assert "Found" in result
assert "file(s)" in result
def test_run_with_kwargs(self, sample_dir: Path) -> None:
"""Test _run ignores extra kwargs."""
result = self.tool._run(
pattern="*.py", path=str(sample_dir), extra_arg="ignored"
)
assert "main.py" in result

View File

@@ -0,0 +1,243 @@
"""Unit tests for GrepTool."""
from pathlib import Path
import pytest
from crewai_tools import GrepTool
@pytest.fixture
def sample_dir(tmp_path: Path) -> Path:
"""Create a temp directory with sample files for testing."""
# src/main.py
src = tmp_path / "src"
src.mkdir()
(src / "main.py").write_text(
"def hello():\n"
" print('Hello, world!')\n"
"\n"
"def goodbye():\n"
" print('Goodbye, world!')\n"
"\n"
"class MyClass:\n"
" pass\n"
)
# src/utils.py
(src / "utils.py").write_text(
"import os\n"
"\n"
"def helper():\n"
" return os.getcwd()\n"
"\n"
"CONSTANT = 42\n"
)
# docs/readme.md
docs = tmp_path / "docs"
docs.mkdir()
(docs / "readme.md").write_text(
"# Project\n"
"\n"
"This is a sample project.\n"
"It has multiple files.\n"
)
# data/binary.bin
data = tmp_path / "data"
data.mkdir()
(data / "binary.bin").write_bytes(b"\x00\x01\x02\x03\x04binary content")
# empty.txt
(tmp_path / "empty.txt").write_text("")
# .git/config (should be skipped)
git_dir = tmp_path / ".git"
git_dir.mkdir()
(git_dir / "config").write_text("[core]\n repositoryformatversion = 0\n")
return tmp_path
class TestGrepTool:
"""Tests for GrepTool."""
def setup_method(self) -> None:
"""Set up test fixtures."""
self.tool = GrepTool()
def test_tool_metadata(self) -> None:
"""Test tool has correct name and description."""
assert self.tool.name == "Search file contents"
assert "search" in self.tool.description.lower() or "Search" in self.tool.description
def test_args_schema(self) -> None:
"""Test that args_schema has correct fields and defaults."""
schema = self.tool.args_schema
fields = schema.model_fields
assert "pattern" in fields
assert fields["pattern"].is_required()
assert "path" in fields
assert not fields["path"].is_required()
assert "glob_pattern" in fields
assert not fields["glob_pattern"].is_required()
assert "output_mode" in fields
assert not fields["output_mode"].is_required()
assert "case_insensitive" in fields
assert not fields["case_insensitive"].is_required()
assert "context_lines" in fields
assert not fields["context_lines"].is_required()
assert "include_line_numbers" in fields
assert not fields["include_line_numbers"].is_required()
def test_basic_pattern_match(self, sample_dir: Path) -> None:
"""Test simple string pattern found in output."""
result = self.tool._run(pattern="Hello", path=str(sample_dir))
assert "Hello" in result
def test_regex_pattern(self, sample_dir: Path) -> None:
"""Test regex pattern matches function definitions."""
result = self.tool._run(pattern=r"def\s+\w+", path=str(sample_dir))
assert "def hello" in result
assert "def goodbye" in result
assert "def helper" in result
def test_case_sensitive_default(self, sample_dir: Path) -> None:
"""Test that search is case-sensitive by default."""
result = self.tool._run(pattern="hello", path=str(sample_dir))
# "hello" (lowercase) appears in "def hello():" but not in "Hello, world!"
assert "hello" in result
# Verify it found the function definition line
assert "def hello" in result
def test_case_insensitive(self, sample_dir: Path) -> None:
"""Test case-insensitive matching."""
result = self.tool._run(
pattern="hello", path=str(sample_dir), case_insensitive=True
)
# Should match both "def hello():" and "Hello, world!"
assert "hello" in result.lower()
assert "Hello" in result
def test_output_mode_content(self, sample_dir: Path) -> None:
"""Test content output mode shows file paths, line numbers, and text."""
result = self.tool._run(
pattern="CONSTANT", path=str(sample_dir), output_mode="content"
)
assert "utils.py" in result
assert "CONSTANT" in result
# Should have line numbers by default
assert ": " in result
def test_output_mode_files_with_matches(self, sample_dir: Path) -> None:
"""Test files_with_matches output mode shows only file paths."""
result = self.tool._run(
pattern="def", path=str(sample_dir), output_mode="files_with_matches"
)
assert "main.py" in result
assert "utils.py" in result
# Should not contain line content
assert "print" not in result
def test_output_mode_count(self, sample_dir: Path) -> None:
"""Test count output mode shows filepath: N format."""
result = self.tool._run(
pattern="def", path=str(sample_dir), output_mode="count"
)
# main.py has 2 def lines, utils.py has 1
assert "main.py: 2" in result
assert "utils.py: 1" in result
def test_context_lines(self, sample_dir: Path) -> None:
"""Test surrounding context lines are included."""
result = self.tool._run(
pattern="CONSTANT", path=str(sample_dir), context_lines=2
)
# Two lines before CONSTANT = 42 is " return os.getcwd()"
assert "return os.getcwd()" in result
assert "CONSTANT" in result
def test_line_numbers_disabled(self, sample_dir: Path) -> None:
"""Test output without line number prefixes."""
result = self.tool._run(
pattern="CONSTANT",
path=str(sample_dir),
include_line_numbers=False,
)
assert "CONSTANT = 42" in result
# Verify no line number prefix (e.g., "6: ")
for line in result.strip().split("\n"):
if "CONSTANT" in line:
assert not line[0].isdigit() or ": " not in line
def test_glob_pattern_filtering(self, sample_dir: Path) -> None:
"""Test glob pattern filters to specific file types."""
result = self.tool._run(
pattern="project",
path=str(sample_dir),
glob_pattern="*.py",
case_insensitive=True,
)
# "project" appears in readme.md but not in .py files
assert "No matches found" in result
def test_search_single_file(self, sample_dir: Path) -> None:
"""Test searching a single file by path."""
file_path = str(sample_dir / "src" / "main.py")
result = self.tool._run(pattern="def", path=file_path)
assert "def hello" in result
assert "def goodbye" in result
# Should not include results from other files
assert "helper" not in result
def test_path_not_found(self) -> None:
"""Test error message when path doesn't exist."""
result = self.tool._run(pattern="test", path="/nonexistent/path")
assert "Error" in result
assert "does not exist" in result
def test_invalid_regex(self, sample_dir: Path) -> None:
"""Test error message for invalid regex patterns."""
result = self.tool._run(pattern="[invalid", path=str(sample_dir))
assert "Error" in result
assert "Invalid regex" in result
def test_binary_files_skipped(self, sample_dir: Path) -> None:
"""Test binary files are not included in results."""
result = self.tool._run(pattern="binary", path=str(sample_dir))
# binary.bin has null bytes so it should be skipped
assert "binary.bin" not in result
def test_no_matches_found(self, sample_dir: Path) -> None:
"""Test message when no matches are found."""
result = self.tool._run(
pattern="zzz_nonexistent_pattern_zzz", path=str(sample_dir)
)
assert "No matches found" in result
def test_hidden_dirs_skipped(self, sample_dir: Path) -> None:
"""Test that .git/ directory contents are not searched."""
result = self.tool._run(pattern="repositoryformatversion", path=str(sample_dir))
assert "No matches found" in result
def test_empty_file(self, sample_dir: Path) -> None:
"""Test searching an empty file doesn't crash."""
result = self.tool._run(
pattern="anything", path=str(sample_dir / "empty.txt")
)
assert "No matches found" in result
def test_run_with_kwargs(self, sample_dir: Path) -> None:
"""Test _run ignores extra kwargs."""
result = self.tool._run(
pattern="Hello", path=str(sample_dir), extra_arg="ignored"
)
assert "Hello" in result

View File

@@ -0,0 +1,195 @@
"""Unit tests for FileReadTool."""
from pathlib import Path
import pytest
from crewai_tools.tools.file_read_tool.file_read_tool import FileReadTool
@pytest.fixture
def sample_file(tmp_path: Path) -> Path:
"""Create a sample text file with numbered lines."""
file_path = tmp_path / "sample.txt"
lines = [f"Line {i}: This is line number {i}." for i in range(1, 101)]
file_path.write_text("\n".join(lines) + "\n")
return file_path
@pytest.fixture
def binary_file(tmp_path: Path) -> Path:
"""Create a binary file with null bytes."""
file_path = tmp_path / "binary.bin"
file_path.write_bytes(b"\x00\x01\x02\x03binary content\x00\x04\x05")
return file_path
@pytest.fixture
def empty_file(tmp_path: Path) -> Path:
"""Create an empty file."""
file_path = tmp_path / "empty.txt"
file_path.write_text("")
return file_path
class TestFileReadTool:
"""Tests for FileReadTool."""
def setup_method(self) -> None:
"""Set up test fixtures."""
self.tool = FileReadTool()
def test_tool_metadata(self) -> None:
"""Test tool has correct name and description."""
assert self.tool.name == "read_file"
assert "read" in self.tool.description.lower()
def test_args_schema(self) -> None:
"""Test that args_schema has correct fields."""
schema = self.tool.args_schema
fields = schema.model_fields
assert "file_path" in fields
assert fields["file_path"].is_required()
assert "offset" in fields
assert not fields["offset"].is_required()
assert "limit" in fields
assert not fields["limit"].is_required()
assert "include_line_numbers" in fields
assert not fields["include_line_numbers"].is_required()
def test_read_entire_file(self, sample_file: Path) -> None:
"""Test reading entire file with line numbers."""
result = self.tool._run(file_path=str(sample_file))
assert "File:" in result
assert "Total lines: 100" in result
assert "Line 1:" in result
assert "|" in result # Line number separator
def test_read_with_offset(self, sample_file: Path) -> None:
"""Test reading from a specific line offset."""
result = self.tool._run(file_path=str(sample_file), offset=50, limit=10)
assert "Showing lines: 50-59" in result
assert "Line 50:" in result
assert "Line 59:" in result
# Should not include lines before offset
assert "Line 49:" not in result
def test_negative_offset_reads_from_end(self, sample_file: Path) -> None:
"""Test negative offset reads from end of file."""
result = self.tool._run(file_path=str(sample_file), offset=-10)
assert "Showing lines: 91-100" in result
assert "Line 91:" in result
assert "Line 100:" in result
def test_limit_controls_line_count(self, sample_file: Path) -> None:
"""Test limit parameter controls how many lines are read."""
result = self.tool._run(file_path=str(sample_file), offset=1, limit=5)
assert "Showing lines: 1-5" in result
# Count output lines (excluding header)
content_lines = [l for l in result.split("\n") if "|" in l and l.strip()]
assert len(content_lines) == 5
def test_line_numbers_included_by_default(self, sample_file: Path) -> None:
"""Test line numbers are included by default."""
result = self.tool._run(file_path=str(sample_file), limit=5)
# Lines should have format " 1|content"
assert "|" in result
for line in result.split("\n"):
if "Line 1:" in line:
assert "|" in line
def test_line_numbers_can_be_disabled(self, sample_file: Path) -> None:
"""Test line numbers can be disabled."""
result = self.tool._run(
file_path=str(sample_file), limit=5, include_line_numbers=False
)
# Content lines shouldn't have the line number prefix
content_section = result.split("\n\n", 1)[-1] # Skip header
for line in content_section.split("\n"):
if line.strip() and "Line" in line:
# Should not start with number|
assert not line.strip()[0].isdigit() or "|" not in line[:10]
def test_binary_file_detection(self, binary_file: Path) -> None:
"""Test binary files are detected and not read as text."""
result = self.tool._run(file_path=str(binary_file))
assert "Error" in result
assert "binary" in result.lower()
def test_empty_file(self, empty_file: Path) -> None:
"""Test reading empty file returns appropriate message."""
result = self.tool._run(file_path=str(empty_file))
assert "Total lines: 0" in result
assert "Empty file" in result
def test_file_not_found(self) -> None:
"""Test error message when file doesn't exist."""
result = self.tool._run(file_path="/nonexistent/file.txt")
assert "Error" in result
assert "not found" in result.lower()
def test_directory_path_error(self, tmp_path: Path) -> None:
"""Test error when path is a directory."""
result = self.tool._run(file_path=str(tmp_path))
assert "Error" in result
assert "directory" in result.lower()
def test_file_metadata_in_header(self, sample_file: Path) -> None:
"""Test file metadata is included in response header."""
result = self.tool._run(file_path=str(sample_file), limit=10)
# Should have file path
assert str(sample_file) in result
# Should have total lines
assert "Total lines:" in result
def test_large_file_auto_truncation(self, tmp_path: Path) -> None:
"""Test large files are automatically truncated."""
# Create a file with 1000 lines
large_file = tmp_path / "large.txt"
lines = [f"Line {i}" for i in range(1, 1001)]
large_file.write_text("\n".join(lines))
result = self.tool._run(file_path=str(large_file))
# Should be truncated and include message about it
assert "truncated" in result.lower() or "Showing lines" in result
# Should not read all 1000 lines without explicit limit
assert "Line 1000" not in result or "limit" in result.lower()
def test_legacy_start_line_parameter(self, sample_file: Path) -> None:
"""Test backward compatibility with start_line parameter."""
result = self.tool._run(file_path=str(sample_file), start_line=10, line_count=5)
assert "Showing lines: 10-14" in result
assert "Line 10:" in result
def test_constructor_with_file_path(self, sample_file: Path) -> None:
"""Test constructing tool with default file path."""
tool = FileReadTool(file_path=str(sample_file))
result = tool._run()
assert "Line 1:" in result
def test_constructor_file_path_override(self, sample_file: Path, tmp_path: Path) -> None:
"""Test runtime file_path overrides constructor file_path."""
other_file = tmp_path / "other.txt"
other_file.write_text("Different content\n")
tool = FileReadTool(file_path=str(sample_file))
result = tool._run(file_path=str(other_file))
assert "Different content" in result
assert "Line 1:" not in result
def test_no_file_path_error(self) -> None:
"""Test error when no file path is provided."""
result = self.tool._run()
assert "Error" in result
assert "No file path" in result
def test_offset_beyond_file_length(self, sample_file: Path) -> None:
"""Test offset beyond file length returns empty content."""
result = self.tool._run(file_path=str(sample_file), offset=200)
# File has 100 lines, offset 200 should show nothing
# But header should still show file info
assert "Total lines: 100" in result

View File

@@ -6697,9 +6697,9 @@
}
},
{
"description": "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read.",
"description": "Read content from a file on disk. Returns file content with line numbers prefixed (format: 'LINE_NUMBER|CONTENT'). Use 'offset' to start from a specific line (negative values read from end), and 'limit' to control how many lines to read. For large files, reads are automatically limited.",
"env_vars": [],
"humanized_name": "Read a file's content",
"humanized_name": "read_file",
"init_params_schema": {
"$defs": {
"EnvVar": {
@@ -6738,7 +6738,7 @@
"type": "object"
}
},
"description": "A tool for reading file contents.\n\nThis tool inherits its schema handling from BaseTool to avoid recursive schema\ndefinition issues. The args_schema is set to FileReadToolSchema which defines\nthe required file_path parameter. The schema should not be overridden in the\nconstructor as it would break the inheritance chain and cause infinite loops.\n\nThe tool supports two ways of specifying the file path:\n1. At construction time via the file_path parameter\n2. At runtime via the file_path parameter in the tool's input\n\nArgs:\n file_path (Optional[str]): Path to the file to be read. If provided,\n this becomes the default file path for the tool.\n **kwargs: Additional keyword arguments passed to BaseTool.\n\nExample:\n >>> tool = FileReadTool(file_path=\"/path/to/file.txt\")\n >>> content = tool.run() # Reads /path/to/file.txt\n >>> content = tool.run(file_path=\"/path/to/other.txt\") # Reads other.txt\n >>> content = tool.run(\n ... file_path=\"/path/to/file.txt\", start_line=100, line_count=50\n ... ) # Reads lines 100-149",
"description": "A tool for reading file contents with line number support.\n\nThis tool provides Claude Code-like file reading capabilities:\n- Line number prefixes for easy reference\n- Offset/limit support for reading specific portions of large files\n- Negative offset support for reading from end of file\n- Binary file detection\n- File metadata (total lines) in response header\n\nThe tool supports two ways of specifying the file path:\n1. At construction time via the file_path parameter\n2. At runtime via the file_path parameter in the tool's input\n\nArgs:\n file_path (Optional[str]): Path to the file to be read. If provided,\n this becomes the default file path for the tool.\n **kwargs: Additional keyword arguments passed to BaseTool.\n\nExample:\n >>> tool = FileReadTool()\n >>> content = tool.run(file_path=\"/path/to/file.txt\") # Reads entire file\n >>> content = tool.run(\n ... file_path=\"/path/to/file.txt\", offset=100, limit=50\n ... ) # Lines 100-149\n >>> content = tool.run(\n ... file_path=\"/path/to/file.txt\", offset=-20\n ... ) # Last 20 lines",
"properties": {
"file_path": {
"anyOf": [
@@ -6766,6 +6766,25 @@
"title": "File Path",
"type": "string"
},
"include_line_numbers": {
"default": true,
"description": "Whether to prefix each line with its line number (format: 'LINE_NUMBER|CONTENT')",
"title": "Include Line Numbers",
"type": "boolean"
},
"limit": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"description": "Maximum number of lines to read. If None, reads up to the default limit (500 lines) for large files, or entire file for small files.",
"title": "Limit"
},
"line_count": {
"anyOf": [
{
@@ -6776,9 +6795,22 @@
}
],
"default": null,
"description": "Number of lines to read. If None, reads the entire file",
"description": "[DEPRECATED: Use 'limit' instead] Number of lines to read.",
"title": "Line Count"
},
"offset": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"description": "Line number to start reading from. Positive values are 1-indexed from the start. Negative values count from the end (e.g., -10 reads last 10 lines). If None, reads from the beginning.",
"title": "Offset"
},
"start_line": {
"anyOf": [
{
@@ -6788,8 +6820,8 @@
"type": "null"
}
],
"default": 1,
"description": "Line number to start reading from (1-indexed)",
"default": null,
"description": "[DEPRECATED: Use 'offset' instead] Line number to start reading from (1-indexed).",
"title": "Start Line"
}
},
@@ -8360,6 +8392,233 @@
"type": "object"
}
},
{
"description": "Find files matching a glob pattern. Use to discover files by name or extension. Examples: '*.py' finds all Python files, '**/*.yaml' finds YAML files recursively, 'test_*.py' finds test files. Returns matching file paths sorted by modification time.",
"env_vars": [],
"humanized_name": "glob",
"init_params_schema": {
"$defs": {
"EnvVar": {
"properties": {
"default": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Default"
},
"description": {
"title": "Description",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"required": {
"default": true,
"title": "Required",
"type": "boolean"
}
},
"required": [
"name",
"description"
],
"title": "EnvVar",
"type": "object"
}
},
"description": "Tool for finding files matching glob patterns.\n\nRecursively searches for files matching a glob pattern within a directory.\nUseful for discovering files by name, extension, or path pattern.\nComplements GrepTool which searches by file content.\n\nExample:\n >>> tool = GlobTool()\n >>> result = tool.run(pattern=\"*.py\", path=\"/path/to/project\")\n >>> result = tool.run(pattern=\"**/*.yaml\", output_mode=\"detailed\")",
"properties": {},
"title": "GlobTool",
"type": "object"
},
"name": "GlobTool",
"package_dependencies": [],
"run_params_schema": {
"description": "Schema for glob tool arguments.",
"properties": {
"dirs_only": {
"default": false,
"description": "If True, only match directories, not files.",
"title": "Dirs Only",
"type": "boolean"
},
"files_only": {
"default": true,
"description": "If True (default), only match files, not directories.",
"title": "Files Only",
"type": "boolean"
},
"include_hidden": {
"default": false,
"description": "Whether to include hidden files and directories (starting with '.').",
"title": "Include Hidden",
"type": "boolean"
},
"output_mode": {
"default": "paths",
"description": "Output format: 'paths' shows file paths one per line, 'tree' shows directory tree structure, 'detailed' includes file sizes.",
"enum": [
"paths",
"tree",
"detailed"
],
"title": "Output Mode",
"type": "string"
},
"path": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Directory to search in. Defaults to current working directory.",
"title": "Path"
},
"pattern": {
"description": "Glob pattern to match files. Examples: '*.py' (Python files), '**/*.yaml' (all YAML files recursively), 'src/**/*.ts' (TypeScript in src), 'test_*.py' (test files). Patterns not starting with '**/' are auto-prefixed for recursive search.",
"title": "Pattern",
"type": "string"
}
},
"required": [
"pattern"
],
"title": "GlobToolSchema",
"type": "object"
}
},
{
"description": "A tool that searches file contents on disk using regex patterns. Recursively searches files in a directory for matching lines. Returns matching content with line numbers, file paths only, or match counts.",
"env_vars": [],
"humanized_name": "Search file contents",
"init_params_schema": {
"$defs": {
"EnvVar": {
"properties": {
"default": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Default"
},
"description": {
"title": "Description",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"required": {
"default": true,
"title": "Required",
"type": "boolean"
}
},
"required": [
"name",
"description"
],
"title": "EnvVar",
"type": "object"
}
},
"description": "Tool for searching file contents on disk using regex patterns.\n\nRecursively searches files in a directory for lines matching a regex pattern.\nSupports glob filtering, context lines, and multiple output modes.\n\nExample:\n >>> tool = GrepTool()\n >>> result = tool.run(pattern=\"def.*main\", path=\"/path/to/project\")\n >>> result = tool.run(\n ... pattern=\"TODO\",\n ... path=\"/path/to/project\",\n ... glob_pattern=\"*.py\",\n ... context_lines=2,\n ... )",
"properties": {},
"title": "GrepTool",
"type": "object"
},
"name": "GrepTool",
"package_dependencies": [],
"run_params_schema": {
"description": "Schema for grep tool arguments.",
"properties": {
"case_insensitive": {
"default": false,
"description": "Whether to perform case-insensitive matching",
"title": "Case Insensitive",
"type": "boolean"
},
"context_lines": {
"default": 0,
"description": "Number of lines to show before and after each match",
"title": "Context Lines",
"type": "integer"
},
"glob_pattern": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Glob pattern to filter files (e.g. '*.py', '*.{ts,tsx}')",
"title": "Glob Pattern"
},
"include_line_numbers": {
"default": true,
"description": "Whether to prefix matching lines with line numbers",
"title": "Include Line Numbers",
"type": "boolean"
},
"output_mode": {
"default": "content",
"description": "Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
"enum": [
"content",
"files_with_matches",
"count"
],
"title": "Output Mode",
"type": "string"
},
"path": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "File or directory to search in. Defaults to current working directory.",
"title": "Path"
},
"pattern": {
"description": "Regex pattern to search for in file contents",
"title": "Pattern",
"type": "string"
}
},
"required": [
"pattern"
],
"title": "GrepToolSchema",
"type": "object"
}
},
{
"description": "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html",
"env_vars": [

View File

@@ -0,0 +1,230 @@
"""Unit tests for GlobTool."""
from pathlib import Path
import pytest
from crewai.tools.agent_tools.glob_tool import GlobTool
@pytest.fixture
def sample_dir(tmp_path: Path) -> Path:
"""Create a temp directory with sample files for testing."""
# src/main.py
src = tmp_path / "src"
src.mkdir()
(src / "main.py").write_text("def main(): pass\n")
(src / "utils.py").write_text("def helper(): pass\n")
(src / "config.yaml").write_text("key: value\n")
# src/components/
components = src / "components"
components.mkdir()
(components / "button.tsx").write_text("export const Button = () => {};\n")
(components / "input.tsx").write_text("export const Input = () => {};\n")
(components / "index.ts").write_text("export * from './button';\n")
# tests/
tests = tmp_path / "tests"
tests.mkdir()
(tests / "test_main.py").write_text("def test_main(): pass\n")
(tests / "test_utils.py").write_text("def test_utils(): pass\n")
# docs/
docs = tmp_path / "docs"
docs.mkdir()
(docs / "readme.md").write_text("# Project\n")
(docs / "api.md").write_text("# API\n")
# data/binary.bin
data = tmp_path / "data"
data.mkdir()
(data / "binary.bin").write_bytes(b"\x00\x01\x02\x03binary content")
# empty.txt
(tmp_path / "empty.txt").write_text("")
# Hidden files (should be skipped by default)
(tmp_path / ".hidden").write_text("hidden content\n")
hidden_dir = tmp_path / ".hidden_dir"
hidden_dir.mkdir()
(hidden_dir / "secret.txt").write_text("secret\n")
# .git/config (should be skipped)
git_dir = tmp_path / ".git"
git_dir.mkdir()
(git_dir / "config").write_text("[core]\n repositoryformatversion = 0\n")
# node_modules (should be skipped)
node_modules = tmp_path / "node_modules"
node_modules.mkdir()
(node_modules / "package.json").write_text('{"name": "test"}\n')
return tmp_path
class TestGlobTool:
"""Tests for GlobTool."""
def setup_method(self) -> None:
"""Set up test fixtures."""
self.tool = GlobTool()
def test_tool_metadata(self) -> None:
"""Test tool has correct name and description."""
assert self.tool.name == "glob"
assert "find" in self.tool.description.lower() or "pattern" in self.tool.description.lower()
def test_args_schema(self) -> None:
"""Test that args_schema has correct fields and defaults."""
schema = self.tool.args_schema
fields = schema.model_fields
assert "pattern" in fields
assert fields["pattern"].is_required()
assert "path" in fields
assert not fields["path"].is_required()
assert "output_mode" in fields
assert not fields["output_mode"].is_required()
assert "include_hidden" in fields
assert not fields["include_hidden"].is_required()
def test_basic_pattern_match(self, sample_dir: Path) -> None:
"""Test simple glob pattern finds files."""
result = self.tool._run(pattern="*.py", path=str(sample_dir))
assert "main.py" in result
assert "utils.py" in result
assert "test_main.py" in result
assert "test_utils.py" in result
def test_recursive_pattern(self, sample_dir: Path) -> None:
"""Test recursive glob pattern with **."""
result = self.tool._run(pattern="**/*.tsx", path=str(sample_dir))
assert "button.tsx" in result
assert "input.tsx" in result
def test_auto_recursive_prefix(self, sample_dir: Path) -> None:
"""Test that patterns without ** are auto-prefixed for recursive search."""
result = self.tool._run(pattern="*.yaml", path=str(sample_dir))
assert "config.yaml" in result
def test_specific_directory_pattern(self, sample_dir: Path) -> None:
"""Test pattern targeting specific directory."""
result = self.tool._run(pattern="src/**/*.py", path=str(sample_dir))
assert "main.py" in result
assert "utils.py" in result
# Should not include test files
assert "test_main.py" not in result
def test_output_mode_paths(self, sample_dir: Path) -> None:
"""Test paths output mode shows full file paths."""
result = self.tool._run(pattern="*.md", path=str(sample_dir), output_mode="paths")
assert "readme.md" in result
assert "api.md" in result
def test_output_mode_detailed(self, sample_dir: Path) -> None:
"""Test detailed output mode includes file sizes."""
result = self.tool._run(pattern="*.md", path=str(sample_dir), output_mode="detailed")
assert "readme.md" in result
# Should have size information
assert "B" in result # Bytes unit
def test_output_mode_tree(self, sample_dir: Path) -> None:
"""Test tree output mode shows directory structure."""
result = self.tool._run(pattern="*.py", path=str(sample_dir), output_mode="tree")
assert "src/" in result or "src" in result
assert "tests/" in result or "tests" in result
def test_hidden_files_excluded_by_default(self, sample_dir: Path) -> None:
"""Test hidden files are not included by default."""
result = self.tool._run(pattern="*", path=str(sample_dir))
assert ".hidden" not in result
assert "secret.txt" not in result
def test_hidden_files_included_when_requested(self, sample_dir: Path) -> None:
"""Test hidden files are included when include_hidden=True."""
result = self.tool._run(pattern="*", path=str(sample_dir), include_hidden=True)
assert ".hidden" in result
def test_git_directory_skipped(self, sample_dir: Path) -> None:
"""Test .git directory contents are not included."""
result = self.tool._run(pattern="*", path=str(sample_dir), include_hidden=True)
# Even with include_hidden, .git should be skipped
# The .git directory itself might show but not its contents
assert "config" not in result or ".git" not in result.split("config")[0].split("\n")[-1]
def test_node_modules_skipped(self, sample_dir: Path) -> None:
"""Test node_modules directory contents are not included."""
result = self.tool._run(pattern="*.json", path=str(sample_dir))
assert "package.json" not in result
def test_path_not_found(self) -> None:
"""Test error message when path doesn't exist."""
result = self.tool._run(pattern="*.py", path="/nonexistent/path")
assert "Error" in result
assert "does not exist" in result
def test_path_is_not_directory(self, sample_dir: Path) -> None:
"""Test error message when path is a file, not directory."""
file_path = str(sample_dir / "empty.txt")
result = self.tool._run(pattern="*.py", path=file_path)
assert "Error" in result
assert "not a directory" in result
def test_no_matches_found(self, sample_dir: Path) -> None:
"""Test message when no files match pattern."""
result = self.tool._run(pattern="*.nonexistent", path=str(sample_dir))
assert "No files found" in result
def test_files_only_default(self, sample_dir: Path) -> None:
"""Test that only files are matched by default (not directories)."""
result = self.tool._run(pattern="*", path=str(sample_dir))
# Should have files
assert ".txt" in result or ".py" in result
# Directories shouldn't have trailing slash in paths mode
lines = [l for l in result.split("\n") if "src/" in l and l.strip().endswith("/")]
# Should not list src/ as a match (it's a directory)
assert len(lines) == 0 or "tree" in result.lower()
def test_dirs_only(self, sample_dir: Path) -> None:
"""Test dirs_only flag matches only directories."""
result = self.tool._run(
pattern="*", path=str(sample_dir), dirs_only=True, files_only=False
)
assert "src" in result
assert "tests" in result
assert "docs" in result
# Should not include files
assert ".py" not in result
assert ".txt" not in result
def test_match_count_summary(self, sample_dir: Path) -> None:
"""Test that result includes count of matched files."""
result = self.tool._run(pattern="*.py", path=str(sample_dir))
assert "Found" in result
assert "file" in result.lower()
def test_run_with_kwargs(self, sample_dir: Path) -> None:
"""Test _run ignores extra kwargs."""
result = self.tool._run(
pattern="*.py", path=str(sample_dir), extra_arg="ignored"
)
assert "main.py" in result
def test_test_file_pattern(self, sample_dir: Path) -> None:
"""Test finding test files with test_*.py pattern."""
result = self.tool._run(pattern="test_*.py", path=str(sample_dir))
assert "test_main.py" in result
assert "test_utils.py" in result
# Should not include non-test files
assert "main.py" not in result or "test_main.py" in result
def test_typescript_files(self, sample_dir: Path) -> None:
"""Test finding TypeScript files with combined pattern."""
result = self.tool._run(pattern="*.ts", path=str(sample_dir))
assert "index.ts" in result
# .tsx files should not match *.ts
assert "button.tsx" not in result