Compare commits

...

19 Commits

Author SHA1 Message Date
github-actions[bot]
2c78e60f56 chore: update tool specifications 2026-03-10 17:32:23 +00:00
Lorenze Jay
8e336a476f Merge branch 'main' into lorenze/feat/grep-tool 2026-03-10 10:31:04 -07:00
github-actions[bot]
2d0e81c10d chore: update tool specifications 2026-02-17 22:35:58 +00:00
Lorenze Jay
c8dd6c006c Merge branch 'main' into lorenze/feat/grep-tool 2026-02-17 14:34:36 -08:00
lorenzejay
73f44c878d Merge branch 'lorenze/feat/grep-tool' of github.com:crewAIInc/crewAI into lorenze/feat/grep-tool 2026-02-12 10:29:58 -08:00
lorenzejay
364143a682 fix test 2026-02-12 10:29:46 -08:00
github-actions[bot]
f894d8cf9d chore: update tool specifications 2026-02-12 18:29:36 +00:00
lorenzejay
1f0265781a Merge branch 'lorenze/feat/grep-tool' of github.com:crewAIInc/crewAI into lorenze/feat/grep-tool 2026-02-12 10:28:16 -08:00
lorenzejay
9fae6c0adf feat: enhance GrepTool with sensitive file exclusion and file size limit
- Added MAX_CONTEXT_LINES to define the upper limit for context lines shown in search results.
- Introduced MAX_FILE_SIZE_BYTES to skip files larger than 10 MB during searches.
- Implemented logic to exclude sensitive files (e.g., .env, .netrc) from search results to prevent accidental leakage of credentials.
- Updated tests to validate sensitive file exclusion and file size limits, ensuring robustness in handling sensitive content.
2026-02-12 10:27:24 -08:00
Lorenze Jay
dea2e1e715 Merge branch 'main' into lorenze/feat/grep-tool 2026-02-12 09:24:15 -08:00
github-actions[bot]
b97fc83656 chore: update tool specifications 2026-02-12 04:47:03 +00:00
lorenzejay
925ed7850e linted 2026-02-11 20:45:40 -08:00
lorenzejay
ec2b6a0287 feat: enhance GrepTool with regex length limit, path restrictions, and brace expansion support
- Added MAX_REGEX_LENGTH to limit regex pattern length and prevent ReDoS.
- Introduced allow_unrestricted_paths option to enable searching outside the current working directory.
- Implemented brace expansion for glob patterns to support multiple file types.
- Enhanced error handling for path traversal and regex compilation.
- Updated tests to cover new features and ensure robustness.
2026-02-11 20:44:46 -08:00
Lorenze Jay
25835ca795 Merge branch 'main' into lorenze/feat/grep-tool 2026-02-11 14:23:35 -08:00
Lorenze Jay
e65940816b Merge branch 'main' into lorenze/feat/grep-tool 2026-02-09 11:28:49 -08:00
Lorenze Jay
ad2435f5c1 Merge branch 'main' into lorenze/feat/grep-tool 2026-02-05 12:02:33 -08:00
github-actions[bot]
c9971a7418 chore: update tool specifications 2026-02-04 19:52:01 +00:00
lorenzejay
f04bedc9ab moved to tools 2026-02-04 11:50:43 -08:00
Lorenze Jay
5a14007511 native support for grep 2026-02-04 10:28:35 -08:00
6 changed files with 1134 additions and 61 deletions

View File

@@ -88,6 +88,7 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
GenerateCrewaiAutomationTool,
)
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
HyperbrowserLoadTool,
)
@@ -248,6 +249,7 @@ __all__ = [
"FirecrawlSearchTool",
"GenerateCrewaiAutomationTool",
"GithubSearchTool",
"GrepTool",
"HyperbrowserLoadTool",
"InvokeCrewAIAutomationTool",
"JSONSearchTool",

View File

@@ -77,6 +77,7 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
GenerateCrewaiAutomationTool,
)
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
HyperbrowserLoadTool,
)
@@ -232,6 +233,7 @@ __all__ = [
"FirecrawlSearchTool",
"GenerateCrewaiAutomationTool",
"GithubSearchTool",
"GrepTool",
"HyperbrowserLoadTool",
"InvokeCrewAIAutomationTool",
"JSONSearchTool",

View File

@@ -0,0 +1,3 @@
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
__all__ = ["GrepTool"]

View File

@@ -0,0 +1,542 @@
"""Tool for searching file contents on disk using regex patterns."""
from __future__ import annotations
from dataclasses import dataclass, field
from itertools import chain
import os
from pathlib import Path
import re
import signal
import sys
from typing import Literal
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
MAX_OUTPUT_CHARS = 50_000
MAX_FILES = 10_000
MAX_MATCHES_PER_FILE = 200
MAX_LINE_LENGTH = 500
BINARY_CHECK_SIZE = 8192
MAX_REGEX_LENGTH = 1_000
REGEX_MATCH_TIMEOUT_SECONDS = 5
MAX_CONTEXT_LINES = 10
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
SKIP_DIRS = frozenset(
{
".git",
"__pycache__",
"node_modules",
".venv",
"venv",
".tox",
".mypy_cache",
".pytest_cache",
}
)
# File names that may contain secrets or credentials — always excluded from
# search results to prevent accidental sensitive-content leakage.
SENSITIVE_FILE_NAMES = frozenset(
{
".env",
".env.local",
".env.development",
".env.production",
".env.staging",
".env.test",
".netrc",
".npmrc",
".pypirc",
".docker/config.json",
".aws/credentials",
".ssh/id_rsa",
".ssh/id_ed25519",
".ssh/id_ecdsa",
".ssh/id_dsa",
"credentials.json",
"service-account.json",
"secrets.yaml",
"secrets.yml",
"secrets.json",
}
)
# Glob-style suffixes that indicate sensitive content (matched against the
# full file name, e.g. "app.env.bak" won't match, but ".env.bak" will).
SENSITIVE_FILE_PATTERNS = (
".pem",
".key",
".p12",
".pfx",
".jks",
".keystore",
)
@dataclass
class MatchLine:
"""A single line from a search result."""
line_number: int
text: str
is_match: bool # True for match, False for context line
@dataclass
class FileSearchResult:
"""Search results for a single file."""
file_path: Path
matches: list[list[MatchLine]] = field(default_factory=list)
match_count: int = 0
class GrepToolSchema(BaseModel):
"""Schema for grep tool arguments."""
pattern: str = Field(
..., description="Regex pattern to search for in file contents"
)
path: str | None = Field(
default=None,
description="File or directory to search in. Defaults to current working directory.",
)
glob_pattern: str | None = Field(
default=None,
description="Glob pattern to filter files (e.g. '*.py'). Supports brace expansion (e.g. '*.{ts,tsx}').",
)
output_mode: Literal["content", "files_with_matches", "count"] = Field(
default="content",
description="Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
)
case_insensitive: bool = Field(
default=False,
description="Whether to perform case-insensitive matching",
)
context_lines: int = Field(
default=0,
ge=0,
le=MAX_CONTEXT_LINES,
description=f"Number of lines to show before and after each match (0-{MAX_CONTEXT_LINES})",
)
include_line_numbers: bool = Field(
default=True,
description="Whether to prefix matching lines with line numbers",
)
class GrepTool(BaseTool):
"""Tool for searching file contents on disk using regex patterns.
Recursively searches files in a directory for lines matching a regex pattern.
Supports glob filtering, context lines, and multiple output modes.
Example:
>>> tool = GrepTool()
>>> result = tool.run(pattern="def.*main", path="src")
>>> result = tool.run(
... pattern="TODO",
... glob_pattern="*.py",
... context_lines=2,
... )
To search any path on the filesystem (opt-in):
>>> tool = GrepTool(allow_unrestricted_paths=True)
>>> result = tool.run(pattern="error", path="/var/log/app")
"""
name: str = "Search file contents"
description: str = (
"A tool that searches file contents on disk using regex patterns. "
"Recursively searches files in a directory for matching lines. "
"Returns matching content with line numbers, file paths only, or match counts."
)
args_schema: type[BaseModel] = GrepToolSchema
allow_unrestricted_paths: bool = Field(
default=False,
description=(
"When False (default), searches are restricted to the current working "
"directory. Set to True to allow searching any path on the filesystem."
),
)
max_file_size_bytes: int = Field(
default=MAX_FILE_SIZE_BYTES,
description=(
"Maximum file size in bytes to search. Files larger than this are "
"skipped. Defaults to 10 MB."
),
)
def _run(
self,
pattern: str,
path: str | None = None,
glob_pattern: str | None = None,
output_mode: Literal["content", "files_with_matches", "count"] = "content",
case_insensitive: bool = False,
context_lines: int = 0,
include_line_numbers: bool = True,
**kwargs: object,
) -> str:
"""Search files for a regex pattern.
Args:
pattern: Regex pattern to search for.
path: File or directory to search. Defaults to cwd.
glob_pattern: Glob pattern to filter files.
output_mode: What to return.
case_insensitive: Case-insensitive matching.
context_lines: Lines of context around matches.
include_line_numbers: Prefix lines with line numbers.
Returns:
Formatted search results as a string.
"""
# Resolve search path — constrained to cwd unless unrestricted
cwd = Path(os.getcwd()).resolve()
if path:
candidate = Path(path)
if candidate.is_absolute():
search_path = candidate.resolve()
else:
search_path = (cwd / candidate).resolve()
# Prevent traversal outside the working directory (unless opted in)
if not self.allow_unrestricted_paths:
try:
search_path.relative_to(cwd)
except ValueError:
return (
f"Error: Path '{path}' is outside the working directory. "
"Initialize with GrepTool(allow_unrestricted_paths=True) to allow this."
)
else:
search_path = cwd
if not search_path.exists():
return f"Error: Path '{search_path}' does not exist."
# Compile regex with length guard to mitigate ReDoS
if len(pattern) > MAX_REGEX_LENGTH:
return f"Error: Pattern too long ({len(pattern)} chars). Maximum is {MAX_REGEX_LENGTH}."
flags = re.IGNORECASE if case_insensitive else 0
try:
compiled = re.compile(pattern, flags)
except re.error as e:
return f"Error: Invalid regex pattern '{pattern}': {e}"
# Collect files
files = self._collect_files(search_path, glob_pattern)
# Search each file
results: list[FileSearchResult] = []
for file_path in files:
result = self._search_file(file_path, compiled, context_lines)
if result is not None:
results.append(result)
if not results:
return "No matches found."
# Format output
if output_mode == "files_with_matches":
output = self._format_files_with_matches(results)
elif output_mode == "count":
output = self._format_count(results)
else:
output = self._format_content(results, include_line_numbers)
# Truncate if needed
if len(output) > MAX_OUTPUT_CHARS:
output = (
output[:MAX_OUTPUT_CHARS]
+ "\n\n... Output truncated. Try a narrower search pattern or glob filter."
)
return output
@staticmethod
def _expand_brace_pattern(pattern: str) -> list[str]:
"""Expand a simple brace pattern into individual globs.
Handles a single level of brace expansion, e.g.
``*.{py,txt}`` -> ``['*.py', '*.txt']``.
Nested braces are *not* supported and the pattern is returned as-is.
Args:
pattern: Glob pattern that may contain ``{a,b,...}`` syntax.
Returns:
List of expanded patterns (or the original if no braces found).
"""
match = re.search(r"\{([^{}]+)\}", pattern)
if not match:
return [pattern]
prefix = pattern[: match.start()]
suffix = pattern[match.end() :]
alternatives = match.group(1).split(",")
return [f"{prefix}{alt.strip()}{suffix}" for alt in alternatives]
def _collect_files(self, search_path: Path, glob_pattern: str | None) -> list[Path]:
"""Collect files to search.
Sensitive files (e.g. ``.env``, ``.netrc``, key material) are
automatically excluded even when searched by explicit path so that
credentials cannot leak into tool output.
Args:
search_path: File or directory to search.
glob_pattern: Optional glob pattern to filter files.
Returns:
List of file paths to search.
"""
if search_path.is_file():
if self._is_sensitive_file(search_path):
return []
return [search_path]
patterns = self._expand_brace_pattern(glob_pattern) if glob_pattern else ["*"]
seen: set[Path] = set()
files: list[Path] = []
for p in chain.from_iterable(search_path.rglob(pat) for pat in patterns):
if not p.is_file():
continue
if p in seen:
continue
seen.add(p)
# Skip hidden/build directories
if any(part in SKIP_DIRS for part in p.relative_to(search_path).parts):
continue
if self._is_sensitive_file(p):
continue
files.append(p)
if len(files) >= MAX_FILES:
break
return sorted(files)
@staticmethod
def _safe_search(
compiled_pattern: re.Pattern[str], line: str
) -> re.Match[str] | None:
"""Run a regex search with a per-line timeout to mitigate ReDoS.
On platforms that support SIGALRM (Unix), a timeout is enforced.
On Windows, the search runs without a timeout but is still bounded
by MAX_LINE_LENGTH truncation applied earlier in the pipeline.
Args:
compiled_pattern: Compiled regex pattern.
line: The text line to search.
Returns:
Match object if found, None otherwise (including on timeout).
"""
if sys.platform == "win32":
return compiled_pattern.search(line)
def _timeout_handler(signum: int, frame: object) -> None:
raise TimeoutError
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(REGEX_MATCH_TIMEOUT_SECONDS)
try:
return compiled_pattern.search(line)
except TimeoutError:
return None
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)
@staticmethod
def _is_sensitive_file(file_path: Path) -> bool:
"""Check whether a file is likely to contain secrets or credentials.
The check is deliberately conservative — it matches exact file names
(e.g. ``.env``, ``.netrc``) as well as common key/certificate
extensions. Files whose *name* starts with ``.env`` (including
variants like ``.env.local``, ``.env.production``, etc.) are also
excluded.
Args:
file_path: Path to the file.
Returns:
True if the file should be skipped.
"""
name = file_path.name
# Exact-name match (e.g. ".env", ".netrc", "secrets.json")
if name in SENSITIVE_FILE_NAMES:
return True
# Any .env variant (.env.backup, .env.staging.old, …)
if name.startswith(".env"):
return True
# Extension-based match for key/cert material
if any(name.endswith(ext) for ext in SENSITIVE_FILE_PATTERNS):
return True
# Check path components for well-known sensitive dirs/files
# e.g. ".aws/credentials" or ".ssh/id_rsa"
parts = file_path.parts
for i, _part in enumerate(parts):
remaining = "/".join(parts[i:])
if remaining in SENSITIVE_FILE_NAMES:
return True
return False
def _is_binary_file(self, file_path: Path) -> bool:
"""Check if a file is binary by looking for null bytes.
Args:
file_path: Path to the file.
Returns:
True if the file appears to be binary.
"""
try:
with open(file_path, "rb") as f:
chunk = f.read(BINARY_CHECK_SIZE)
return b"\x00" in chunk
except (OSError, PermissionError):
return True
def _search_file(
self,
file_path: Path,
compiled_pattern: re.Pattern[str],
context_lines: int,
) -> FileSearchResult | None:
"""Search a single file for matches.
Args:
file_path: Path to the file.
compiled_pattern: Compiled regex pattern.
context_lines: Number of context lines around matches.
Returns:
FileSearchResult if matches found, None otherwise.
"""
if self._is_sensitive_file(file_path):
return None
if self._is_binary_file(file_path):
return None
# Skip files that are too large to safely read into memory
try:
file_size = file_path.stat().st_size
except OSError:
return None
if file_size > self.max_file_size_bytes:
return None
try:
with open(file_path, encoding="utf-8", errors="replace") as f:
lines = f.readlines()
except (OSError, PermissionError):
return None
# Find matching line numbers
match_line_nums: list[int] = []
for i, line in enumerate(lines):
if self._safe_search(compiled_pattern, line):
match_line_nums.append(i)
if len(match_line_nums) >= MAX_MATCHES_PER_FILE:
break
if not match_line_nums:
return None
# Build groups of contiguous match blocks with context
groups: list[list[MatchLine]] = []
current_group: list[MatchLine] = []
prev_end = -1
for match_idx in match_line_nums:
start = max(0, match_idx - context_lines)
end = min(len(lines), match_idx + context_lines + 1)
# If this block doesn't overlap with the previous, start a new group
if start > prev_end and current_group:
groups.append(current_group)
current_group = []
for i in range(max(start, prev_end), end):
text = lines[i].rstrip("\n\r")
if len(text) > MAX_LINE_LENGTH:
text = text[:MAX_LINE_LENGTH] + "..."
current_group.append(
MatchLine(
line_number=i + 1, # 1-indexed
text=text,
is_match=(i in match_line_nums),
)
)
prev_end = end
if current_group:
groups.append(current_group)
return FileSearchResult(
file_path=file_path,
matches=groups,
match_count=len(match_line_nums),
)
def _format_content(
self,
results: list[FileSearchResult],
include_line_numbers: bool,
) -> str:
"""Format results showing matching content.
Args:
results: List of file search results.
include_line_numbers: Whether to include line numbers.
Returns:
Formatted string with file paths and matching lines.
"""
parts: list[str] = []
for result in results:
parts.append(str(result.file_path))
for group_idx, group in enumerate(result.matches):
if group_idx > 0:
parts.append("--")
for match_line in group:
if include_line_numbers:
parts.append(f"{match_line.line_number}: {match_line.text}")
else:
parts.append(match_line.text)
parts.append("") # blank line between files
return "\n".join(parts).rstrip()
def _format_files_with_matches(self, results: list[FileSearchResult]) -> str:
"""Format results showing only file paths.
Args:
results: List of file search results.
Returns:
One file path per line.
"""
return "\n".join(str(r.file_path) for r in results)
def _format_count(self, results: list[FileSearchResult]) -> str:
"""Format results showing match counts per file.
Args:
results: List of file search results.
Returns:
Filepath and count per line.
"""
return "\n".join(f"{r.file_path}: {r.match_count}" for r in results)

View File

@@ -0,0 +1,450 @@
"""Unit tests for GrepTool."""
from __future__ import annotations
from pathlib import Path
import pytest
from pydantic import ValidationError
from crewai_tools import GrepTool
from crewai_tools.tools.grep_tool.grep_tool import (
MAX_CONTEXT_LINES,
MAX_REGEX_LENGTH,
GrepToolSchema,
)
@pytest.fixture
def sample_dir(tmp_path: Path) -> Path:
"""Create a temp directory with sample files for testing."""
# src/main.py
src = tmp_path / "src"
src.mkdir()
(src / "main.py").write_text(
"def hello():\n"
" print('Hello, world!')\n"
"\n"
"def goodbye():\n"
" print('Goodbye, world!')\n"
"\n"
"class MyClass:\n"
" pass\n"
)
# src/utils.py
(src / "utils.py").write_text(
"import os\n"
"\n"
"def helper():\n"
" return os.getcwd()\n"
"\n"
"CONSTANT = 42\n"
)
# docs/readme.md
docs = tmp_path / "docs"
docs.mkdir()
(docs / "readme.md").write_text(
"# Project\n"
"\n"
"This is a sample project.\n"
"It has multiple files.\n"
)
# data/binary.bin
data = tmp_path / "data"
data.mkdir()
(data / "binary.bin").write_bytes(b"\x00\x01\x02\x03\x04binary content")
# empty.txt
(tmp_path / "empty.txt").write_text("")
# .git/config (should be skipped)
git_dir = tmp_path / ".git"
git_dir.mkdir()
(git_dir / "config").write_text("[core]\n repositoryformatversion = 0\n")
return tmp_path
class TestGrepTool:
"""Tests for GrepTool."""
def setup_method(self) -> None:
"""Set up test fixtures.
We use allow_unrestricted_paths=True so that tests using pytest's
tmp_path (which lives outside the working directory) are not rejected
by the path-restriction guard.
"""
self.tool = GrepTool(allow_unrestricted_paths=True)
def test_tool_metadata(self) -> None:
"""Test tool has correct name and description."""
assert self.tool.name == "Search file contents"
assert "search" in self.tool.description.lower() or "Search" in self.tool.description
def test_args_schema(self) -> None:
"""Test that args_schema has correct fields and defaults."""
schema = self.tool.args_schema
fields = schema.model_fields
assert "pattern" in fields
assert fields["pattern"].is_required()
assert "path" in fields
assert not fields["path"].is_required()
assert "glob_pattern" in fields
assert not fields["glob_pattern"].is_required()
assert "output_mode" in fields
assert not fields["output_mode"].is_required()
assert "case_insensitive" in fields
assert not fields["case_insensitive"].is_required()
assert "context_lines" in fields
assert not fields["context_lines"].is_required()
assert "include_line_numbers" in fields
assert not fields["include_line_numbers"].is_required()
def test_basic_pattern_match(self, sample_dir: Path) -> None:
"""Test simple string pattern found in output."""
result = self.tool._run(pattern="Hello", path=str(sample_dir))
assert "Hello" in result
def test_regex_pattern(self, sample_dir: Path) -> None:
"""Test regex pattern matches function definitions."""
result = self.tool._run(pattern=r"def\s+\w+", path=str(sample_dir))
assert "def hello" in result
assert "def goodbye" in result
assert "def helper" in result
def test_case_sensitive_default(self, sample_dir: Path) -> None:
"""Test that search is case-sensitive by default."""
result = self.tool._run(pattern="hello", path=str(sample_dir))
# "hello" (lowercase) appears in "def hello():" but not in "Hello, world!"
assert "hello" in result
# Verify it found the function definition line
assert "def hello" in result
def test_case_insensitive(self, sample_dir: Path) -> None:
"""Test case-insensitive matching."""
result = self.tool._run(
pattern="hello", path=str(sample_dir), case_insensitive=True
)
# Should match both "def hello():" and "Hello, world!"
assert "hello" in result.lower()
assert "Hello" in result
def test_output_mode_content(self, sample_dir: Path) -> None:
"""Test content output mode shows file paths, line numbers, and text."""
result = self.tool._run(
pattern="CONSTANT", path=str(sample_dir), output_mode="content"
)
assert "utils.py" in result
assert "CONSTANT" in result
# Should have line numbers by default
assert ": " in result
def test_output_mode_files_with_matches(self, sample_dir: Path) -> None:
"""Test files_with_matches output mode shows only file paths."""
result = self.tool._run(
pattern="def", path=str(sample_dir), output_mode="files_with_matches"
)
assert "main.py" in result
assert "utils.py" in result
# Should not contain line content
assert "print" not in result
def test_output_mode_count(self, sample_dir: Path) -> None:
"""Test count output mode shows filepath: N format."""
result = self.tool._run(
pattern="def", path=str(sample_dir), output_mode="count"
)
# main.py has 2 def lines, utils.py has 1
assert "main.py: 2" in result
assert "utils.py: 1" in result
def test_context_lines(self, sample_dir: Path) -> None:
"""Test surrounding context lines are included."""
result = self.tool._run(
pattern="CONSTANT", path=str(sample_dir), context_lines=2
)
# Two lines before CONSTANT = 42 is " return os.getcwd()"
assert "return os.getcwd()" in result
assert "CONSTANT" in result
def test_line_numbers_disabled(self, sample_dir: Path) -> None:
"""Test output without line number prefixes."""
result = self.tool._run(
pattern="CONSTANT",
path=str(sample_dir),
include_line_numbers=False,
)
assert "CONSTANT = 42" in result
# Verify no line number prefix (e.g., "6: ")
for line in result.strip().split("\n"):
if "CONSTANT" in line:
assert not line[0].isdigit() or ": " not in line
def test_glob_pattern_filtering(self, sample_dir: Path) -> None:
"""Test glob pattern filters to specific file types."""
result = self.tool._run(
pattern="project",
path=str(sample_dir),
glob_pattern="*.py",
case_insensitive=True,
)
# "project" appears in readme.md but not in .py files
assert "No matches found" in result
def test_search_single_file(self, sample_dir: Path) -> None:
"""Test searching a single file by path."""
file_path = str(sample_dir / "src" / "main.py")
result = self.tool._run(pattern="def", path=file_path)
assert "def hello" in result
assert "def goodbye" in result
# Should not include results from other files
assert "helper" not in result
def test_path_not_found(self) -> None:
"""Test error message when a relative path doesn't exist."""
result = self.tool._run(pattern="test", path="totally_nonexistent_subdir")
assert "Error" in result
assert "does not exist" in result
def test_invalid_regex(self, sample_dir: Path) -> None:
"""Test error message for invalid regex patterns."""
result = self.tool._run(pattern="[invalid", path=str(sample_dir))
assert "Error" in result
assert "Invalid regex" in result
def test_binary_files_skipped(self, sample_dir: Path) -> None:
"""Test binary files are not included in results."""
result = self.tool._run(pattern="binary", path=str(sample_dir))
# binary.bin has null bytes so it should be skipped
assert "binary.bin" not in result
def test_no_matches_found(self, sample_dir: Path) -> None:
"""Test message when no matches are found."""
result = self.tool._run(
pattern="zzz_nonexistent_pattern_zzz", path=str(sample_dir)
)
assert "No matches found" in result
def test_hidden_dirs_skipped(self, sample_dir: Path) -> None:
"""Test that .git/ directory contents are not searched."""
result = self.tool._run(pattern="repositoryformatversion", path=str(sample_dir))
assert "No matches found" in result
def test_empty_file(self, sample_dir: Path) -> None:
"""Test searching an empty file doesn't crash."""
result = self.tool._run(
pattern="anything", path=str(sample_dir / "empty.txt")
)
assert "No matches found" in result
def test_run_with_kwargs(self, sample_dir: Path) -> None:
"""Test _run ignores extra kwargs."""
result = self.tool._run(
pattern="Hello", path=str(sample_dir), extra_arg="ignored"
)
assert "Hello" in result
class TestPathRestriction:
"""Tests for path traversal prevention and allow_unrestricted_paths."""
def test_absolute_path_outside_cwd_blocked(self, tmp_path: Path) -> None:
"""An absolute path outside cwd is rejected by default."""
tool = GrepTool()
# tmp_path is almost certainly not under os.getcwd()
result = tool._run(pattern="anything", path=str(tmp_path))
assert "Error" in result
assert "outside the working directory" in result
def test_relative_traversal_blocked(self, sample_dir: Path) -> None:
"""A relative path with ../ that escapes cwd is rejected."""
tool = GrepTool()
result = tool._run(pattern="anything", path="../../etc")
assert "Error" in result
assert "outside the working directory" in result
def test_relative_path_within_cwd_allowed(self) -> None:
"""A relative path that stays inside cwd works fine."""
tool = GrepTool()
# "." is always within cwd
result = tool._run(pattern="zzz_will_not_match_anything_zzz", path=".")
# Should not get a traversal error — either matches or "No matches found"
assert "outside the working directory" not in result
def test_allow_unrestricted_paths_bypasses_check(self, tmp_path: Path) -> None:
"""With allow_unrestricted_paths=True, absolute paths outside cwd are allowed."""
# Write a searchable file in tmp_path
(tmp_path / "hello.txt").write_text("unrestricted search target\n")
tool = GrepTool(allow_unrestricted_paths=True)
result = tool._run(pattern="unrestricted", path=str(tmp_path))
assert "unrestricted search target" in result
def test_allow_unrestricted_defaults_false(self) -> None:
"""The flag defaults to False."""
tool = GrepTool()
assert tool.allow_unrestricted_paths is False
def test_error_message_includes_hint(self, tmp_path: Path) -> None:
"""The traversal error tells the user how to opt in."""
tool = GrepTool()
result = tool._run(pattern="x", path=str(tmp_path))
assert "GrepTool(allow_unrestricted_paths=True)" in result
class TestReDoSGuards:
"""Tests for regex denial-of-service mitigations."""
def test_pattern_length_rejected(self, sample_dir: Path) -> None:
"""Patterns exceeding MAX_REGEX_LENGTH are rejected before compilation."""
tool = GrepTool(allow_unrestricted_paths=True)
long_pattern = "a" * (MAX_REGEX_LENGTH + 1)
result = tool._run(pattern=long_pattern, path=str(sample_dir))
assert "Error" in result
assert "Pattern too long" in result
def test_pattern_at_max_length_accepted(self, sample_dir: Path) -> None:
"""A pattern exactly at MAX_REGEX_LENGTH is allowed (boundary check)."""
tool = GrepTool(allow_unrestricted_paths=True)
exact_pattern = "a" * MAX_REGEX_LENGTH
result = tool._run(pattern=exact_pattern, path=str(sample_dir))
# Should not get a length error — either matches or "No matches found"
assert "Pattern too long" not in result
def test_safe_search_returns_match(self) -> None:
"""_safe_search returns a match object for a normal pattern."""
compiled = __import__("re").compile(r"hello")
match = GrepTool._safe_search(compiled, "say hello world")
assert match is not None
assert match.group() == "hello"
def test_safe_search_returns_none_on_no_match(self) -> None:
"""_safe_search returns None when the pattern doesn't match."""
compiled = __import__("re").compile(r"zzz")
match = GrepTool._safe_search(compiled, "hello world")
assert match is None
class TestBraceExpansion:
"""Tests for glob brace expansion ({a,b} syntax)."""
def test_expand_simple_brace(self) -> None:
"""*.{py,txt} expands to ['*.py', '*.txt']."""
result = GrepTool._expand_brace_pattern("*.{py,txt}")
assert result == ["*.py", "*.txt"]
def test_expand_three_alternatives(self) -> None:
"""*.{py,txt,md} expands to three patterns."""
result = GrepTool._expand_brace_pattern("*.{py,txt,md}")
assert result == ["*.py", "*.txt", "*.md"]
def test_expand_no_braces_passthrough(self) -> None:
"""A pattern without braces is returned as a single-element list."""
result = GrepTool._expand_brace_pattern("*.py")
assert result == ["*.py"]
def test_expand_strips_whitespace(self) -> None:
"""Whitespace around alternatives inside braces is stripped."""
result = GrepTool._expand_brace_pattern("*.{ py , txt }")
assert result == ["*.py", "*.txt"]
def test_expand_prefix_and_suffix(self) -> None:
"""Prefix and suffix around the braces are preserved."""
result = GrepTool._expand_brace_pattern("src/*.{py,pyi}.bak")
assert result == ["src/*.py.bak", "src/*.pyi.bak"]
def test_brace_glob_end_to_end(self, tmp_path: Path) -> None:
"""Brace expansion works end-to-end with _collect_files."""
(tmp_path / "a.py").write_text("match_me\n")
(tmp_path / "b.txt").write_text("match_me\n")
(tmp_path / "c.md").write_text("match_me\n")
tool = GrepTool(allow_unrestricted_paths=True)
result = tool._run(
pattern="match_me",
path=str(tmp_path),
glob_pattern="*.{py,txt}",
)
assert "a.py" in result
assert "b.txt" in result
# .md should NOT be included
assert "c.md" not in result
def test_brace_glob_no_duplicates(self, tmp_path: Path) -> None:
"""Files are not reported twice when they match multiple expanded patterns."""
(tmp_path / "x.py").write_text("unique_content\n")
tool = GrepTool(allow_unrestricted_paths=True)
result = tool._run(
pattern="unique_content",
path=str(tmp_path),
glob_pattern="*.{py,py}",
output_mode="count",
)
# Should appear exactly once
assert result.count("x.py") == 1
class TestSensitiveFileProtection:
"""Tests for sensitive file exclusion (secrets leakage prevention)."""
@pytest.mark.parametrize(
"name",
[".env", ".env.local", ".netrc", ".npmrc", "secrets.json", "server.pem"],
)
def test_sensitive_files_excluded(self, tmp_path: Path, name: str) -> None:
"""Sensitive files are skipped even if they contain matches."""
(tmp_path / name).write_text("MATCH_ME\n")
tool = GrepTool(allow_unrestricted_paths=True)
result = tool._run(pattern="MATCH_ME", path=str(tmp_path))
assert "No matches found" in result
def test_sensitive_file_blocked_by_direct_path(self, tmp_path: Path) -> None:
"""A .env passed as the explicit path argument is still blocked."""
env = tmp_path / ".env"
env.write_text("SECRET=abc\n")
tool = GrepTool(allow_unrestricted_paths=True)
result = tool._run(pattern="SECRET", path=str(env))
assert "No matches found" in result
class TestFileSizeLimit:
"""Tests for max_file_size_bytes guard."""
def test_large_file_skipped(self, tmp_path: Path) -> None:
"""Files over max_file_size_bytes are skipped."""
(tmp_path / "big.txt").write_text("needle\n" * 100)
tool = GrepTool(allow_unrestricted_paths=True, max_file_size_bytes=50)
result = tool._run(pattern="needle", path=str(tmp_path))
assert "No matches found" in result
def test_large_file_searched_with_raised_limit(self, tmp_path: Path) -> None:
"""Raising the limit lets the same file be searched."""
(tmp_path / "big.txt").write_text("needle\n" * 100)
tool = GrepTool(allow_unrestricted_paths=True, max_file_size_bytes=50_000)
result = tool._run(pattern="needle", path=str(tmp_path))
assert "needle" in result
class TestContextLinesUpperBound:
"""Tests for context_lines validation bounds."""
def test_negative_rejected(self) -> None:
"""context_lines < 0 is rejected by Pydantic."""
with pytest.raises(ValidationError):
GrepToolSchema(pattern="x", context_lines=-1)
def test_over_max_rejected(self) -> None:
"""context_lines > MAX_CONTEXT_LINES is rejected by Pydantic."""
with pytest.raises(ValidationError):
GrepToolSchema(pattern="x", context_lines=MAX_CONTEXT_LINES + 1)

View File

@@ -5664,10 +5664,6 @@
"title": "Bucket Name",
"type": "string"
},
"cluster": {
"description": "An instance of the Couchbase Cluster connected to the desired Couchbase server.",
"title": "Cluster"
},
"collection_name": {
"description": "The name of the Couchbase collection to search",
"title": "Collection Name",
@@ -5716,7 +5712,6 @@
}
},
"required": [
"cluster",
"collection_name",
"scope_name",
"bucket_name",
@@ -10155,6 +10150,141 @@
"type": "object"
}
},
{
"description": "A tool that searches file contents on disk using regex patterns. Recursively searches files in a directory for matching lines. Returns matching content with line numbers, file paths only, or match counts.",
"env_vars": [],
"humanized_name": "Search file contents",
"init_params_schema": {
"$defs": {
"EnvVar": {
"properties": {
"default": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Default"
},
"description": {
"title": "Description",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"required": {
"default": true,
"title": "Required",
"type": "boolean"
}
},
"required": [
"name",
"description"
],
"title": "EnvVar",
"type": "object"
}
},
"description": "Tool for searching file contents on disk using regex patterns.\n\nRecursively searches files in a directory for lines matching a regex pattern.\nSupports glob filtering, context lines, and multiple output modes.\n\nExample:\n >>> tool = GrepTool()\n >>> result = tool.run(pattern=\"def.*main\", path=\"src\")\n >>> result = tool.run(\n ... pattern=\"TODO\",\n ... glob_pattern=\"*.py\",\n ... context_lines=2,\n ... )\n\n To search any path on the filesystem (opt-in):\n >>> tool = GrepTool(allow_unrestricted_paths=True)\n >>> result = tool.run(pattern=\"error\", path=\"/var/log/app\")",
"properties": {
"allow_unrestricted_paths": {
"default": false,
"description": "When False (default), searches are restricted to the current working directory. Set to True to allow searching any path on the filesystem.",
"title": "Allow Unrestricted Paths",
"type": "boolean"
},
"max_file_size_bytes": {
"default": 10485760,
"description": "Maximum file size in bytes to search. Files larger than this are skipped. Defaults to 10 MB.",
"title": "Max File Size Bytes",
"type": "integer"
}
},
"title": "GrepTool",
"type": "object"
},
"name": "GrepTool",
"package_dependencies": [],
"run_params_schema": {
"description": "Schema for grep tool arguments.",
"properties": {
"case_insensitive": {
"default": false,
"description": "Whether to perform case-insensitive matching",
"title": "Case Insensitive",
"type": "boolean"
},
"context_lines": {
"default": 0,
"description": "Number of lines to show before and after each match (0-10)",
"maximum": 10,
"minimum": 0,
"title": "Context Lines",
"type": "integer"
},
"glob_pattern": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Glob pattern to filter files (e.g. '*.py'). Supports brace expansion (e.g. '*.{ts,tsx}').",
"title": "Glob Pattern"
},
"include_line_numbers": {
"default": true,
"description": "Whether to prefix matching lines with line numbers",
"title": "Include Line Numbers",
"type": "boolean"
},
"output_mode": {
"default": "content",
"description": "Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
"enum": [
"content",
"files_with_matches",
"count"
],
"title": "Output Mode",
"type": "string"
},
"path": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "File or directory to search in. Defaults to current working directory.",
"title": "Path"
},
"pattern": {
"description": "Regex pattern to search for in file contents",
"title": "Pattern",
"type": "string"
}
},
"required": [
"pattern"
],
"title": "GrepToolSchema",
"type": "object"
}
},
{
"description": "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html",
"env_vars": [
@@ -14460,13 +14590,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsAmazonProductScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsAmazonProductScraperTool",
@@ -14689,13 +14815,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsAmazonSearchScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsAmazonSearchScraperTool",
@@ -14931,13 +15053,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsGoogleSearchScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsGoogleSearchScraperTool",
@@ -15121,13 +15239,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsUniversalScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsUniversalScraperTool",
@@ -23229,26 +23343,6 @@
"description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
"title": "Api Key"
},
"async_client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Async Client"
},
"client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Client"
},
"extract_depth": {
"default": "basic",
"description": "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
@@ -23384,26 +23478,6 @@
"description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
"title": "Api Key"
},
"async_client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Async Client"
},
"client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Client"
},
"days": {
"default": 7,
"description": "The number of days to search back.",