mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-03-17 09:18:18 +00:00
Compare commits
19 Commits
gl/chore/r
...
lorenze/fe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c78e60f56 | ||
|
|
8e336a476f | ||
|
|
2d0e81c10d | ||
|
|
c8dd6c006c | ||
|
|
73f44c878d | ||
|
|
364143a682 | ||
|
|
f894d8cf9d | ||
|
|
1f0265781a | ||
|
|
9fae6c0adf | ||
|
|
dea2e1e715 | ||
|
|
b97fc83656 | ||
|
|
925ed7850e | ||
|
|
ec2b6a0287 | ||
|
|
25835ca795 | ||
|
|
e65940816b | ||
|
|
ad2435f5c1 | ||
|
|
c9971a7418 | ||
|
|
f04bedc9ab | ||
|
|
5a14007511 |
@@ -88,6 +88,7 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
|
||||
GenerateCrewaiAutomationTool,
|
||||
)
|
||||
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
|
||||
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
|
||||
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
|
||||
HyperbrowserLoadTool,
|
||||
)
|
||||
@@ -248,6 +249,7 @@ __all__ = [
|
||||
"FirecrawlSearchTool",
|
||||
"GenerateCrewaiAutomationTool",
|
||||
"GithubSearchTool",
|
||||
"GrepTool",
|
||||
"HyperbrowserLoadTool",
|
||||
"InvokeCrewAIAutomationTool",
|
||||
"JSONSearchTool",
|
||||
|
||||
@@ -77,6 +77,7 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
|
||||
GenerateCrewaiAutomationTool,
|
||||
)
|
||||
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
|
||||
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
|
||||
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
|
||||
HyperbrowserLoadTool,
|
||||
)
|
||||
@@ -232,6 +233,7 @@ __all__ = [
|
||||
"FirecrawlSearchTool",
|
||||
"GenerateCrewaiAutomationTool",
|
||||
"GithubSearchTool",
|
||||
"GrepTool",
|
||||
"HyperbrowserLoadTool",
|
||||
"InvokeCrewAIAutomationTool",
|
||||
"JSONSearchTool",
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from crewai_tools.tools.grep_tool.grep_tool import GrepTool
|
||||
|
||||
__all__ = ["GrepTool"]
|
||||
542
lib/crewai-tools/src/crewai_tools/tools/grep_tool/grep_tool.py
Normal file
542
lib/crewai-tools/src/crewai_tools/tools/grep_tool/grep_tool.py
Normal file
@@ -0,0 +1,542 @@
|
||||
"""Tool for searching file contents on disk using regex patterns."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import chain
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
from typing import Literal
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
MAX_OUTPUT_CHARS = 50_000
|
||||
MAX_FILES = 10_000
|
||||
MAX_MATCHES_PER_FILE = 200
|
||||
MAX_LINE_LENGTH = 500
|
||||
BINARY_CHECK_SIZE = 8192
|
||||
MAX_REGEX_LENGTH = 1_000
|
||||
REGEX_MATCH_TIMEOUT_SECONDS = 5
|
||||
MAX_CONTEXT_LINES = 10
|
||||
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||
|
||||
SKIP_DIRS = frozenset(
|
||||
{
|
||||
".git",
|
||||
"__pycache__",
|
||||
"node_modules",
|
||||
".venv",
|
||||
"venv",
|
||||
".tox",
|
||||
".mypy_cache",
|
||||
".pytest_cache",
|
||||
}
|
||||
)
|
||||
|
||||
# File names that may contain secrets or credentials — always excluded from
|
||||
# search results to prevent accidental sensitive-content leakage.
|
||||
SENSITIVE_FILE_NAMES = frozenset(
|
||||
{
|
||||
".env",
|
||||
".env.local",
|
||||
".env.development",
|
||||
".env.production",
|
||||
".env.staging",
|
||||
".env.test",
|
||||
".netrc",
|
||||
".npmrc",
|
||||
".pypirc",
|
||||
".docker/config.json",
|
||||
".aws/credentials",
|
||||
".ssh/id_rsa",
|
||||
".ssh/id_ed25519",
|
||||
".ssh/id_ecdsa",
|
||||
".ssh/id_dsa",
|
||||
"credentials.json",
|
||||
"service-account.json",
|
||||
"secrets.yaml",
|
||||
"secrets.yml",
|
||||
"secrets.json",
|
||||
}
|
||||
)
|
||||
|
||||
# Glob-style suffixes that indicate sensitive content (matched against the
|
||||
# full file name, e.g. "app.env.bak" won't match, but ".env.bak" will).
|
||||
SENSITIVE_FILE_PATTERNS = (
|
||||
".pem",
|
||||
".key",
|
||||
".p12",
|
||||
".pfx",
|
||||
".jks",
|
||||
".keystore",
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchLine:
|
||||
"""A single line from a search result."""
|
||||
|
||||
line_number: int
|
||||
text: str
|
||||
is_match: bool # True for match, False for context line
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileSearchResult:
|
||||
"""Search results for a single file."""
|
||||
|
||||
file_path: Path
|
||||
matches: list[list[MatchLine]] = field(default_factory=list)
|
||||
match_count: int = 0
|
||||
|
||||
|
||||
class GrepToolSchema(BaseModel):
|
||||
"""Schema for grep tool arguments."""
|
||||
|
||||
pattern: str = Field(
|
||||
..., description="Regex pattern to search for in file contents"
|
||||
)
|
||||
path: str | None = Field(
|
||||
default=None,
|
||||
description="File or directory to search in. Defaults to current working directory.",
|
||||
)
|
||||
glob_pattern: str | None = Field(
|
||||
default=None,
|
||||
description="Glob pattern to filter files (e.g. '*.py'). Supports brace expansion (e.g. '*.{ts,tsx}').",
|
||||
)
|
||||
output_mode: Literal["content", "files_with_matches", "count"] = Field(
|
||||
default="content",
|
||||
description="Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
|
||||
)
|
||||
case_insensitive: bool = Field(
|
||||
default=False,
|
||||
description="Whether to perform case-insensitive matching",
|
||||
)
|
||||
context_lines: int = Field(
|
||||
default=0,
|
||||
ge=0,
|
||||
le=MAX_CONTEXT_LINES,
|
||||
description=f"Number of lines to show before and after each match (0-{MAX_CONTEXT_LINES})",
|
||||
)
|
||||
include_line_numbers: bool = Field(
|
||||
default=True,
|
||||
description="Whether to prefix matching lines with line numbers",
|
||||
)
|
||||
|
||||
|
||||
class GrepTool(BaseTool):
|
||||
"""Tool for searching file contents on disk using regex patterns.
|
||||
|
||||
Recursively searches files in a directory for lines matching a regex pattern.
|
||||
Supports glob filtering, context lines, and multiple output modes.
|
||||
|
||||
Example:
|
||||
>>> tool = GrepTool()
|
||||
>>> result = tool.run(pattern="def.*main", path="src")
|
||||
>>> result = tool.run(
|
||||
... pattern="TODO",
|
||||
... glob_pattern="*.py",
|
||||
... context_lines=2,
|
||||
... )
|
||||
|
||||
To search any path on the filesystem (opt-in):
|
||||
>>> tool = GrepTool(allow_unrestricted_paths=True)
|
||||
>>> result = tool.run(pattern="error", path="/var/log/app")
|
||||
"""
|
||||
|
||||
name: str = "Search file contents"
|
||||
description: str = (
|
||||
"A tool that searches file contents on disk using regex patterns. "
|
||||
"Recursively searches files in a directory for matching lines. "
|
||||
"Returns matching content with line numbers, file paths only, or match counts."
|
||||
)
|
||||
args_schema: type[BaseModel] = GrepToolSchema
|
||||
allow_unrestricted_paths: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"When False (default), searches are restricted to the current working "
|
||||
"directory. Set to True to allow searching any path on the filesystem."
|
||||
),
|
||||
)
|
||||
max_file_size_bytes: int = Field(
|
||||
default=MAX_FILE_SIZE_BYTES,
|
||||
description=(
|
||||
"Maximum file size in bytes to search. Files larger than this are "
|
||||
"skipped. Defaults to 10 MB."
|
||||
),
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
pattern: str,
|
||||
path: str | None = None,
|
||||
glob_pattern: str | None = None,
|
||||
output_mode: Literal["content", "files_with_matches", "count"] = "content",
|
||||
case_insensitive: bool = False,
|
||||
context_lines: int = 0,
|
||||
include_line_numbers: bool = True,
|
||||
**kwargs: object,
|
||||
) -> str:
|
||||
"""Search files for a regex pattern.
|
||||
|
||||
Args:
|
||||
pattern: Regex pattern to search for.
|
||||
path: File or directory to search. Defaults to cwd.
|
||||
glob_pattern: Glob pattern to filter files.
|
||||
output_mode: What to return.
|
||||
case_insensitive: Case-insensitive matching.
|
||||
context_lines: Lines of context around matches.
|
||||
include_line_numbers: Prefix lines with line numbers.
|
||||
|
||||
Returns:
|
||||
Formatted search results as a string.
|
||||
"""
|
||||
# Resolve search path — constrained to cwd unless unrestricted
|
||||
cwd = Path(os.getcwd()).resolve()
|
||||
if path:
|
||||
candidate = Path(path)
|
||||
if candidate.is_absolute():
|
||||
search_path = candidate.resolve()
|
||||
else:
|
||||
search_path = (cwd / candidate).resolve()
|
||||
# Prevent traversal outside the working directory (unless opted in)
|
||||
if not self.allow_unrestricted_paths:
|
||||
try:
|
||||
search_path.relative_to(cwd)
|
||||
except ValueError:
|
||||
return (
|
||||
f"Error: Path '{path}' is outside the working directory. "
|
||||
"Initialize with GrepTool(allow_unrestricted_paths=True) to allow this."
|
||||
)
|
||||
else:
|
||||
search_path = cwd
|
||||
if not search_path.exists():
|
||||
return f"Error: Path '{search_path}' does not exist."
|
||||
|
||||
# Compile regex with length guard to mitigate ReDoS
|
||||
if len(pattern) > MAX_REGEX_LENGTH:
|
||||
return f"Error: Pattern too long ({len(pattern)} chars). Maximum is {MAX_REGEX_LENGTH}."
|
||||
flags = re.IGNORECASE if case_insensitive else 0
|
||||
try:
|
||||
compiled = re.compile(pattern, flags)
|
||||
except re.error as e:
|
||||
return f"Error: Invalid regex pattern '{pattern}': {e}"
|
||||
|
||||
# Collect files
|
||||
files = self._collect_files(search_path, glob_pattern)
|
||||
|
||||
# Search each file
|
||||
results: list[FileSearchResult] = []
|
||||
for file_path in files:
|
||||
result = self._search_file(file_path, compiled, context_lines)
|
||||
if result is not None:
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
return "No matches found."
|
||||
|
||||
# Format output
|
||||
if output_mode == "files_with_matches":
|
||||
output = self._format_files_with_matches(results)
|
||||
elif output_mode == "count":
|
||||
output = self._format_count(results)
|
||||
else:
|
||||
output = self._format_content(results, include_line_numbers)
|
||||
|
||||
# Truncate if needed
|
||||
if len(output) > MAX_OUTPUT_CHARS:
|
||||
output = (
|
||||
output[:MAX_OUTPUT_CHARS]
|
||||
+ "\n\n... Output truncated. Try a narrower search pattern or glob filter."
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _expand_brace_pattern(pattern: str) -> list[str]:
|
||||
"""Expand a simple brace pattern into individual globs.
|
||||
|
||||
Handles a single level of brace expansion, e.g.
|
||||
``*.{py,txt}`` -> ``['*.py', '*.txt']``.
|
||||
Nested braces are *not* supported and the pattern is returned as-is.
|
||||
|
||||
Args:
|
||||
pattern: Glob pattern that may contain ``{a,b,...}`` syntax.
|
||||
|
||||
Returns:
|
||||
List of expanded patterns (or the original if no braces found).
|
||||
"""
|
||||
match = re.search(r"\{([^{}]+)\}", pattern)
|
||||
if not match:
|
||||
return [pattern]
|
||||
prefix = pattern[: match.start()]
|
||||
suffix = pattern[match.end() :]
|
||||
alternatives = match.group(1).split(",")
|
||||
return [f"{prefix}{alt.strip()}{suffix}" for alt in alternatives]
|
||||
|
||||
def _collect_files(self, search_path: Path, glob_pattern: str | None) -> list[Path]:
|
||||
"""Collect files to search.
|
||||
|
||||
Sensitive files (e.g. ``.env``, ``.netrc``, key material) are
|
||||
automatically excluded even when searched by explicit path so that
|
||||
credentials cannot leak into tool output.
|
||||
|
||||
Args:
|
||||
search_path: File or directory to search.
|
||||
glob_pattern: Optional glob pattern to filter files.
|
||||
|
||||
Returns:
|
||||
List of file paths to search.
|
||||
"""
|
||||
if search_path.is_file():
|
||||
if self._is_sensitive_file(search_path):
|
||||
return []
|
||||
return [search_path]
|
||||
|
||||
patterns = self._expand_brace_pattern(glob_pattern) if glob_pattern else ["*"]
|
||||
seen: set[Path] = set()
|
||||
files: list[Path] = []
|
||||
for p in chain.from_iterable(search_path.rglob(pat) for pat in patterns):
|
||||
if not p.is_file():
|
||||
continue
|
||||
if p in seen:
|
||||
continue
|
||||
seen.add(p)
|
||||
# Skip hidden/build directories
|
||||
if any(part in SKIP_DIRS for part in p.relative_to(search_path).parts):
|
||||
continue
|
||||
if self._is_sensitive_file(p):
|
||||
continue
|
||||
files.append(p)
|
||||
if len(files) >= MAX_FILES:
|
||||
break
|
||||
|
||||
return sorted(files)
|
||||
|
||||
@staticmethod
|
||||
def _safe_search(
|
||||
compiled_pattern: re.Pattern[str], line: str
|
||||
) -> re.Match[str] | None:
|
||||
"""Run a regex search with a per-line timeout to mitigate ReDoS.
|
||||
|
||||
On platforms that support SIGALRM (Unix), a timeout is enforced.
|
||||
On Windows, the search runs without a timeout but is still bounded
|
||||
by MAX_LINE_LENGTH truncation applied earlier in the pipeline.
|
||||
|
||||
Args:
|
||||
compiled_pattern: Compiled regex pattern.
|
||||
line: The text line to search.
|
||||
|
||||
Returns:
|
||||
Match object if found, None otherwise (including on timeout).
|
||||
"""
|
||||
if sys.platform == "win32":
|
||||
return compiled_pattern.search(line)
|
||||
|
||||
def _timeout_handler(signum: int, frame: object) -> None:
|
||||
raise TimeoutError
|
||||
|
||||
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
|
||||
signal.alarm(REGEX_MATCH_TIMEOUT_SECONDS)
|
||||
try:
|
||||
return compiled_pattern.search(line)
|
||||
except TimeoutError:
|
||||
return None
|
||||
finally:
|
||||
signal.alarm(0)
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
@staticmethod
|
||||
def _is_sensitive_file(file_path: Path) -> bool:
|
||||
"""Check whether a file is likely to contain secrets or credentials.
|
||||
|
||||
The check is deliberately conservative — it matches exact file names
|
||||
(e.g. ``.env``, ``.netrc``) as well as common key/certificate
|
||||
extensions. Files whose *name* starts with ``.env`` (including
|
||||
variants like ``.env.local``, ``.env.production``, etc.) are also
|
||||
excluded.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file.
|
||||
|
||||
Returns:
|
||||
True if the file should be skipped.
|
||||
"""
|
||||
name = file_path.name
|
||||
|
||||
# Exact-name match (e.g. ".env", ".netrc", "secrets.json")
|
||||
if name in SENSITIVE_FILE_NAMES:
|
||||
return True
|
||||
|
||||
# Any .env variant (.env.backup, .env.staging.old, …)
|
||||
if name.startswith(".env"):
|
||||
return True
|
||||
|
||||
# Extension-based match for key/cert material
|
||||
if any(name.endswith(ext) for ext in SENSITIVE_FILE_PATTERNS):
|
||||
return True
|
||||
|
||||
# Check path components for well-known sensitive dirs/files
|
||||
# e.g. ".aws/credentials" or ".ssh/id_rsa"
|
||||
parts = file_path.parts
|
||||
for i, _part in enumerate(parts):
|
||||
remaining = "/".join(parts[i:])
|
||||
if remaining in SENSITIVE_FILE_NAMES:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_binary_file(self, file_path: Path) -> bool:
|
||||
"""Check if a file is binary by looking for null bytes.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file.
|
||||
|
||||
Returns:
|
||||
True if the file appears to be binary.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
chunk = f.read(BINARY_CHECK_SIZE)
|
||||
return b"\x00" in chunk
|
||||
except (OSError, PermissionError):
|
||||
return True
|
||||
|
||||
def _search_file(
|
||||
self,
|
||||
file_path: Path,
|
||||
compiled_pattern: re.Pattern[str],
|
||||
context_lines: int,
|
||||
) -> FileSearchResult | None:
|
||||
"""Search a single file for matches.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file.
|
||||
compiled_pattern: Compiled regex pattern.
|
||||
context_lines: Number of context lines around matches.
|
||||
|
||||
Returns:
|
||||
FileSearchResult if matches found, None otherwise.
|
||||
"""
|
||||
if self._is_sensitive_file(file_path):
|
||||
return None
|
||||
|
||||
if self._is_binary_file(file_path):
|
||||
return None
|
||||
|
||||
# Skip files that are too large to safely read into memory
|
||||
try:
|
||||
file_size = file_path.stat().st_size
|
||||
except OSError:
|
||||
return None
|
||||
if file_size > self.max_file_size_bytes:
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(file_path, encoding="utf-8", errors="replace") as f:
|
||||
lines = f.readlines()
|
||||
except (OSError, PermissionError):
|
||||
return None
|
||||
|
||||
# Find matching line numbers
|
||||
match_line_nums: list[int] = []
|
||||
for i, line in enumerate(lines):
|
||||
if self._safe_search(compiled_pattern, line):
|
||||
match_line_nums.append(i)
|
||||
if len(match_line_nums) >= MAX_MATCHES_PER_FILE:
|
||||
break
|
||||
|
||||
if not match_line_nums:
|
||||
return None
|
||||
|
||||
# Build groups of contiguous match blocks with context
|
||||
groups: list[list[MatchLine]] = []
|
||||
current_group: list[MatchLine] = []
|
||||
prev_end = -1
|
||||
|
||||
for match_idx in match_line_nums:
|
||||
start = max(0, match_idx - context_lines)
|
||||
end = min(len(lines), match_idx + context_lines + 1)
|
||||
|
||||
# If this block doesn't overlap with the previous, start a new group
|
||||
if start > prev_end and current_group:
|
||||
groups.append(current_group)
|
||||
current_group = []
|
||||
|
||||
for i in range(max(start, prev_end), end):
|
||||
text = lines[i].rstrip("\n\r")
|
||||
if len(text) > MAX_LINE_LENGTH:
|
||||
text = text[:MAX_LINE_LENGTH] + "..."
|
||||
current_group.append(
|
||||
MatchLine(
|
||||
line_number=i + 1, # 1-indexed
|
||||
text=text,
|
||||
is_match=(i in match_line_nums),
|
||||
)
|
||||
)
|
||||
|
||||
prev_end = end
|
||||
|
||||
if current_group:
|
||||
groups.append(current_group)
|
||||
|
||||
return FileSearchResult(
|
||||
file_path=file_path,
|
||||
matches=groups,
|
||||
match_count=len(match_line_nums),
|
||||
)
|
||||
|
||||
def _format_content(
|
||||
self,
|
||||
results: list[FileSearchResult],
|
||||
include_line_numbers: bool,
|
||||
) -> str:
|
||||
"""Format results showing matching content.
|
||||
|
||||
Args:
|
||||
results: List of file search results.
|
||||
include_line_numbers: Whether to include line numbers.
|
||||
|
||||
Returns:
|
||||
Formatted string with file paths and matching lines.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for result in results:
|
||||
parts.append(str(result.file_path))
|
||||
for group_idx, group in enumerate(result.matches):
|
||||
if group_idx > 0:
|
||||
parts.append("--")
|
||||
for match_line in group:
|
||||
if include_line_numbers:
|
||||
parts.append(f"{match_line.line_number}: {match_line.text}")
|
||||
else:
|
||||
parts.append(match_line.text)
|
||||
parts.append("") # blank line between files
|
||||
return "\n".join(parts).rstrip()
|
||||
|
||||
def _format_files_with_matches(self, results: list[FileSearchResult]) -> str:
|
||||
"""Format results showing only file paths.
|
||||
|
||||
Args:
|
||||
results: List of file search results.
|
||||
|
||||
Returns:
|
||||
One file path per line.
|
||||
"""
|
||||
return "\n".join(str(r.file_path) for r in results)
|
||||
|
||||
def _format_count(self, results: list[FileSearchResult]) -> str:
|
||||
"""Format results showing match counts per file.
|
||||
|
||||
Args:
|
||||
results: List of file search results.
|
||||
|
||||
Returns:
|
||||
Filepath and count per line.
|
||||
"""
|
||||
return "\n".join(f"{r.file_path}: {r.match_count}" for r in results)
|
||||
450
lib/crewai-tools/tests/tools/grep_tool_test.py
Normal file
450
lib/crewai-tools/tests/tools/grep_tool_test.py
Normal file
@@ -0,0 +1,450 @@
|
||||
"""Unit tests for GrepTool."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from crewai_tools import GrepTool
|
||||
from crewai_tools.tools.grep_tool.grep_tool import (
|
||||
MAX_CONTEXT_LINES,
|
||||
MAX_REGEX_LENGTH,
|
||||
GrepToolSchema,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_dir(tmp_path: Path) -> Path:
|
||||
"""Create a temp directory with sample files for testing."""
|
||||
# src/main.py
|
||||
src = tmp_path / "src"
|
||||
src.mkdir()
|
||||
(src / "main.py").write_text(
|
||||
"def hello():\n"
|
||||
" print('Hello, world!')\n"
|
||||
"\n"
|
||||
"def goodbye():\n"
|
||||
" print('Goodbye, world!')\n"
|
||||
"\n"
|
||||
"class MyClass:\n"
|
||||
" pass\n"
|
||||
)
|
||||
|
||||
# src/utils.py
|
||||
(src / "utils.py").write_text(
|
||||
"import os\n"
|
||||
"\n"
|
||||
"def helper():\n"
|
||||
" return os.getcwd()\n"
|
||||
"\n"
|
||||
"CONSTANT = 42\n"
|
||||
)
|
||||
|
||||
# docs/readme.md
|
||||
docs = tmp_path / "docs"
|
||||
docs.mkdir()
|
||||
(docs / "readme.md").write_text(
|
||||
"# Project\n"
|
||||
"\n"
|
||||
"This is a sample project.\n"
|
||||
"It has multiple files.\n"
|
||||
)
|
||||
|
||||
# data/binary.bin
|
||||
data = tmp_path / "data"
|
||||
data.mkdir()
|
||||
(data / "binary.bin").write_bytes(b"\x00\x01\x02\x03\x04binary content")
|
||||
|
||||
# empty.txt
|
||||
(tmp_path / "empty.txt").write_text("")
|
||||
|
||||
# .git/config (should be skipped)
|
||||
git_dir = tmp_path / ".git"
|
||||
git_dir.mkdir()
|
||||
(git_dir / "config").write_text("[core]\n repositoryformatversion = 0\n")
|
||||
|
||||
return tmp_path
|
||||
|
||||
|
||||
class TestGrepTool:
|
||||
"""Tests for GrepTool."""
|
||||
|
||||
def setup_method(self) -> None:
|
||||
"""Set up test fixtures.
|
||||
|
||||
We use allow_unrestricted_paths=True so that tests using pytest's
|
||||
tmp_path (which lives outside the working directory) are not rejected
|
||||
by the path-restriction guard.
|
||||
"""
|
||||
self.tool = GrepTool(allow_unrestricted_paths=True)
|
||||
|
||||
def test_tool_metadata(self) -> None:
|
||||
"""Test tool has correct name and description."""
|
||||
assert self.tool.name == "Search file contents"
|
||||
assert "search" in self.tool.description.lower() or "Search" in self.tool.description
|
||||
|
||||
def test_args_schema(self) -> None:
|
||||
"""Test that args_schema has correct fields and defaults."""
|
||||
schema = self.tool.args_schema
|
||||
fields = schema.model_fields
|
||||
|
||||
assert "pattern" in fields
|
||||
assert fields["pattern"].is_required()
|
||||
|
||||
assert "path" in fields
|
||||
assert not fields["path"].is_required()
|
||||
|
||||
assert "glob_pattern" in fields
|
||||
assert not fields["glob_pattern"].is_required()
|
||||
|
||||
assert "output_mode" in fields
|
||||
assert not fields["output_mode"].is_required()
|
||||
|
||||
assert "case_insensitive" in fields
|
||||
assert not fields["case_insensitive"].is_required()
|
||||
|
||||
assert "context_lines" in fields
|
||||
assert not fields["context_lines"].is_required()
|
||||
|
||||
assert "include_line_numbers" in fields
|
||||
assert not fields["include_line_numbers"].is_required()
|
||||
|
||||
def test_basic_pattern_match(self, sample_dir: Path) -> None:
|
||||
"""Test simple string pattern found in output."""
|
||||
result = self.tool._run(pattern="Hello", path=str(sample_dir))
|
||||
assert "Hello" in result
|
||||
|
||||
def test_regex_pattern(self, sample_dir: Path) -> None:
|
||||
"""Test regex pattern matches function definitions."""
|
||||
result = self.tool._run(pattern=r"def\s+\w+", path=str(sample_dir))
|
||||
assert "def hello" in result
|
||||
assert "def goodbye" in result
|
||||
assert "def helper" in result
|
||||
|
||||
def test_case_sensitive_default(self, sample_dir: Path) -> None:
|
||||
"""Test that search is case-sensitive by default."""
|
||||
result = self.tool._run(pattern="hello", path=str(sample_dir))
|
||||
# "hello" (lowercase) appears in "def hello():" but not in "Hello, world!"
|
||||
assert "hello" in result
|
||||
# Verify it found the function definition line
|
||||
assert "def hello" in result
|
||||
|
||||
def test_case_insensitive(self, sample_dir: Path) -> None:
|
||||
"""Test case-insensitive matching."""
|
||||
result = self.tool._run(
|
||||
pattern="hello", path=str(sample_dir), case_insensitive=True
|
||||
)
|
||||
# Should match both "def hello():" and "Hello, world!"
|
||||
assert "hello" in result.lower()
|
||||
assert "Hello" in result
|
||||
|
||||
def test_output_mode_content(self, sample_dir: Path) -> None:
|
||||
"""Test content output mode shows file paths, line numbers, and text."""
|
||||
result = self.tool._run(
|
||||
pattern="CONSTANT", path=str(sample_dir), output_mode="content"
|
||||
)
|
||||
assert "utils.py" in result
|
||||
assert "CONSTANT" in result
|
||||
# Should have line numbers by default
|
||||
assert ": " in result
|
||||
|
||||
def test_output_mode_files_with_matches(self, sample_dir: Path) -> None:
|
||||
"""Test files_with_matches output mode shows only file paths."""
|
||||
result = self.tool._run(
|
||||
pattern="def", path=str(sample_dir), output_mode="files_with_matches"
|
||||
)
|
||||
assert "main.py" in result
|
||||
assert "utils.py" in result
|
||||
# Should not contain line content
|
||||
assert "print" not in result
|
||||
|
||||
def test_output_mode_count(self, sample_dir: Path) -> None:
|
||||
"""Test count output mode shows filepath: N format."""
|
||||
result = self.tool._run(
|
||||
pattern="def", path=str(sample_dir), output_mode="count"
|
||||
)
|
||||
# main.py has 2 def lines, utils.py has 1
|
||||
assert "main.py: 2" in result
|
||||
assert "utils.py: 1" in result
|
||||
|
||||
def test_context_lines(self, sample_dir: Path) -> None:
|
||||
"""Test surrounding context lines are included."""
|
||||
result = self.tool._run(
|
||||
pattern="CONSTANT", path=str(sample_dir), context_lines=2
|
||||
)
|
||||
# Two lines before CONSTANT = 42 is " return os.getcwd()"
|
||||
assert "return os.getcwd()" in result
|
||||
assert "CONSTANT" in result
|
||||
|
||||
def test_line_numbers_disabled(self, sample_dir: Path) -> None:
|
||||
"""Test output without line number prefixes."""
|
||||
result = self.tool._run(
|
||||
pattern="CONSTANT",
|
||||
path=str(sample_dir),
|
||||
include_line_numbers=False,
|
||||
)
|
||||
assert "CONSTANT = 42" in result
|
||||
# Verify no line number prefix (e.g., "6: ")
|
||||
for line in result.strip().split("\n"):
|
||||
if "CONSTANT" in line:
|
||||
assert not line[0].isdigit() or ": " not in line
|
||||
|
||||
def test_glob_pattern_filtering(self, sample_dir: Path) -> None:
|
||||
"""Test glob pattern filters to specific file types."""
|
||||
result = self.tool._run(
|
||||
pattern="project",
|
||||
path=str(sample_dir),
|
||||
glob_pattern="*.py",
|
||||
case_insensitive=True,
|
||||
)
|
||||
# "project" appears in readme.md but not in .py files
|
||||
assert "No matches found" in result
|
||||
|
||||
def test_search_single_file(self, sample_dir: Path) -> None:
|
||||
"""Test searching a single file by path."""
|
||||
file_path = str(sample_dir / "src" / "main.py")
|
||||
result = self.tool._run(pattern="def", path=file_path)
|
||||
assert "def hello" in result
|
||||
assert "def goodbye" in result
|
||||
# Should not include results from other files
|
||||
assert "helper" not in result
|
||||
|
||||
def test_path_not_found(self) -> None:
|
||||
"""Test error message when a relative path doesn't exist."""
|
||||
result = self.tool._run(pattern="test", path="totally_nonexistent_subdir")
|
||||
assert "Error" in result
|
||||
assert "does not exist" in result
|
||||
|
||||
def test_invalid_regex(self, sample_dir: Path) -> None:
|
||||
"""Test error message for invalid regex patterns."""
|
||||
result = self.tool._run(pattern="[invalid", path=str(sample_dir))
|
||||
assert "Error" in result
|
||||
assert "Invalid regex" in result
|
||||
|
||||
def test_binary_files_skipped(self, sample_dir: Path) -> None:
|
||||
"""Test binary files are not included in results."""
|
||||
result = self.tool._run(pattern="binary", path=str(sample_dir))
|
||||
# binary.bin has null bytes so it should be skipped
|
||||
assert "binary.bin" not in result
|
||||
|
||||
def test_no_matches_found(self, sample_dir: Path) -> None:
|
||||
"""Test message when no matches are found."""
|
||||
result = self.tool._run(
|
||||
pattern="zzz_nonexistent_pattern_zzz", path=str(sample_dir)
|
||||
)
|
||||
assert "No matches found" in result
|
||||
|
||||
def test_hidden_dirs_skipped(self, sample_dir: Path) -> None:
|
||||
"""Test that .git/ directory contents are not searched."""
|
||||
result = self.tool._run(pattern="repositoryformatversion", path=str(sample_dir))
|
||||
assert "No matches found" in result
|
||||
|
||||
def test_empty_file(self, sample_dir: Path) -> None:
|
||||
"""Test searching an empty file doesn't crash."""
|
||||
result = self.tool._run(
|
||||
pattern="anything", path=str(sample_dir / "empty.txt")
|
||||
)
|
||||
assert "No matches found" in result
|
||||
|
||||
def test_run_with_kwargs(self, sample_dir: Path) -> None:
|
||||
"""Test _run ignores extra kwargs."""
|
||||
result = self.tool._run(
|
||||
pattern="Hello", path=str(sample_dir), extra_arg="ignored"
|
||||
)
|
||||
assert "Hello" in result
|
||||
|
||||
|
||||
class TestPathRestriction:
|
||||
"""Tests for path traversal prevention and allow_unrestricted_paths."""
|
||||
|
||||
def test_absolute_path_outside_cwd_blocked(self, tmp_path: Path) -> None:
|
||||
"""An absolute path outside cwd is rejected by default."""
|
||||
tool = GrepTool()
|
||||
# tmp_path is almost certainly not under os.getcwd()
|
||||
result = tool._run(pattern="anything", path=str(tmp_path))
|
||||
assert "Error" in result
|
||||
assert "outside the working directory" in result
|
||||
|
||||
def test_relative_traversal_blocked(self, sample_dir: Path) -> None:
|
||||
"""A relative path with ../ that escapes cwd is rejected."""
|
||||
tool = GrepTool()
|
||||
result = tool._run(pattern="anything", path="../../etc")
|
||||
assert "Error" in result
|
||||
assert "outside the working directory" in result
|
||||
|
||||
def test_relative_path_within_cwd_allowed(self) -> None:
|
||||
"""A relative path that stays inside cwd works fine."""
|
||||
tool = GrepTool()
|
||||
# "." is always within cwd
|
||||
result = tool._run(pattern="zzz_will_not_match_anything_zzz", path=".")
|
||||
# Should not get a traversal error — either matches or "No matches found"
|
||||
assert "outside the working directory" not in result
|
||||
|
||||
def test_allow_unrestricted_paths_bypasses_check(self, tmp_path: Path) -> None:
|
||||
"""With allow_unrestricted_paths=True, absolute paths outside cwd are allowed."""
|
||||
# Write a searchable file in tmp_path
|
||||
(tmp_path / "hello.txt").write_text("unrestricted search target\n")
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
result = tool._run(pattern="unrestricted", path=str(tmp_path))
|
||||
assert "unrestricted search target" in result
|
||||
|
||||
def test_allow_unrestricted_defaults_false(self) -> None:
|
||||
"""The flag defaults to False."""
|
||||
tool = GrepTool()
|
||||
assert tool.allow_unrestricted_paths is False
|
||||
|
||||
def test_error_message_includes_hint(self, tmp_path: Path) -> None:
|
||||
"""The traversal error tells the user how to opt in."""
|
||||
tool = GrepTool()
|
||||
result = tool._run(pattern="x", path=str(tmp_path))
|
||||
assert "GrepTool(allow_unrestricted_paths=True)" in result
|
||||
|
||||
|
||||
class TestReDoSGuards:
|
||||
"""Tests for regex denial-of-service mitigations."""
|
||||
|
||||
def test_pattern_length_rejected(self, sample_dir: Path) -> None:
|
||||
"""Patterns exceeding MAX_REGEX_LENGTH are rejected before compilation."""
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
long_pattern = "a" * (MAX_REGEX_LENGTH + 1)
|
||||
result = tool._run(pattern=long_pattern, path=str(sample_dir))
|
||||
assert "Error" in result
|
||||
assert "Pattern too long" in result
|
||||
|
||||
def test_pattern_at_max_length_accepted(self, sample_dir: Path) -> None:
|
||||
"""A pattern exactly at MAX_REGEX_LENGTH is allowed (boundary check)."""
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
exact_pattern = "a" * MAX_REGEX_LENGTH
|
||||
result = tool._run(pattern=exact_pattern, path=str(sample_dir))
|
||||
# Should not get a length error — either matches or "No matches found"
|
||||
assert "Pattern too long" not in result
|
||||
|
||||
def test_safe_search_returns_match(self) -> None:
|
||||
"""_safe_search returns a match object for a normal pattern."""
|
||||
compiled = __import__("re").compile(r"hello")
|
||||
match = GrepTool._safe_search(compiled, "say hello world")
|
||||
assert match is not None
|
||||
assert match.group() == "hello"
|
||||
|
||||
def test_safe_search_returns_none_on_no_match(self) -> None:
|
||||
"""_safe_search returns None when the pattern doesn't match."""
|
||||
compiled = __import__("re").compile(r"zzz")
|
||||
match = GrepTool._safe_search(compiled, "hello world")
|
||||
assert match is None
|
||||
|
||||
|
||||
class TestBraceExpansion:
|
||||
"""Tests for glob brace expansion ({a,b} syntax)."""
|
||||
|
||||
def test_expand_simple_brace(self) -> None:
|
||||
"""*.{py,txt} expands to ['*.py', '*.txt']."""
|
||||
result = GrepTool._expand_brace_pattern("*.{py,txt}")
|
||||
assert result == ["*.py", "*.txt"]
|
||||
|
||||
def test_expand_three_alternatives(self) -> None:
|
||||
"""*.{py,txt,md} expands to three patterns."""
|
||||
result = GrepTool._expand_brace_pattern("*.{py,txt,md}")
|
||||
assert result == ["*.py", "*.txt", "*.md"]
|
||||
|
||||
def test_expand_no_braces_passthrough(self) -> None:
|
||||
"""A pattern without braces is returned as a single-element list."""
|
||||
result = GrepTool._expand_brace_pattern("*.py")
|
||||
assert result == ["*.py"]
|
||||
|
||||
def test_expand_strips_whitespace(self) -> None:
|
||||
"""Whitespace around alternatives inside braces is stripped."""
|
||||
result = GrepTool._expand_brace_pattern("*.{ py , txt }")
|
||||
assert result == ["*.py", "*.txt"]
|
||||
|
||||
def test_expand_prefix_and_suffix(self) -> None:
|
||||
"""Prefix and suffix around the braces are preserved."""
|
||||
result = GrepTool._expand_brace_pattern("src/*.{py,pyi}.bak")
|
||||
assert result == ["src/*.py.bak", "src/*.pyi.bak"]
|
||||
|
||||
def test_brace_glob_end_to_end(self, tmp_path: Path) -> None:
|
||||
"""Brace expansion works end-to-end with _collect_files."""
|
||||
(tmp_path / "a.py").write_text("match_me\n")
|
||||
(tmp_path / "b.txt").write_text("match_me\n")
|
||||
(tmp_path / "c.md").write_text("match_me\n")
|
||||
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
result = tool._run(
|
||||
pattern="match_me",
|
||||
path=str(tmp_path),
|
||||
glob_pattern="*.{py,txt}",
|
||||
)
|
||||
assert "a.py" in result
|
||||
assert "b.txt" in result
|
||||
# .md should NOT be included
|
||||
assert "c.md" not in result
|
||||
|
||||
def test_brace_glob_no_duplicates(self, tmp_path: Path) -> None:
|
||||
"""Files are not reported twice when they match multiple expanded patterns."""
|
||||
(tmp_path / "x.py").write_text("unique_content\n")
|
||||
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
result = tool._run(
|
||||
pattern="unique_content",
|
||||
path=str(tmp_path),
|
||||
glob_pattern="*.{py,py}",
|
||||
output_mode="count",
|
||||
)
|
||||
# Should appear exactly once
|
||||
assert result.count("x.py") == 1
|
||||
|
||||
|
||||
class TestSensitiveFileProtection:
|
||||
"""Tests for sensitive file exclusion (secrets leakage prevention)."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name",
|
||||
[".env", ".env.local", ".netrc", ".npmrc", "secrets.json", "server.pem"],
|
||||
)
|
||||
def test_sensitive_files_excluded(self, tmp_path: Path, name: str) -> None:
|
||||
"""Sensitive files are skipped even if they contain matches."""
|
||||
(tmp_path / name).write_text("MATCH_ME\n")
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
result = tool._run(pattern="MATCH_ME", path=str(tmp_path))
|
||||
assert "No matches found" in result
|
||||
|
||||
def test_sensitive_file_blocked_by_direct_path(self, tmp_path: Path) -> None:
|
||||
"""A .env passed as the explicit path argument is still blocked."""
|
||||
env = tmp_path / ".env"
|
||||
env.write_text("SECRET=abc\n")
|
||||
tool = GrepTool(allow_unrestricted_paths=True)
|
||||
result = tool._run(pattern="SECRET", path=str(env))
|
||||
assert "No matches found" in result
|
||||
|
||||
|
||||
class TestFileSizeLimit:
|
||||
"""Tests for max_file_size_bytes guard."""
|
||||
|
||||
def test_large_file_skipped(self, tmp_path: Path) -> None:
|
||||
"""Files over max_file_size_bytes are skipped."""
|
||||
(tmp_path / "big.txt").write_text("needle\n" * 100)
|
||||
tool = GrepTool(allow_unrestricted_paths=True, max_file_size_bytes=50)
|
||||
result = tool._run(pattern="needle", path=str(tmp_path))
|
||||
assert "No matches found" in result
|
||||
|
||||
def test_large_file_searched_with_raised_limit(self, tmp_path: Path) -> None:
|
||||
"""Raising the limit lets the same file be searched."""
|
||||
(tmp_path / "big.txt").write_text("needle\n" * 100)
|
||||
tool = GrepTool(allow_unrestricted_paths=True, max_file_size_bytes=50_000)
|
||||
result = tool._run(pattern="needle", path=str(tmp_path))
|
||||
assert "needle" in result
|
||||
|
||||
|
||||
class TestContextLinesUpperBound:
|
||||
"""Tests for context_lines validation bounds."""
|
||||
|
||||
def test_negative_rejected(self) -> None:
|
||||
"""context_lines < 0 is rejected by Pydantic."""
|
||||
with pytest.raises(ValidationError):
|
||||
GrepToolSchema(pattern="x", context_lines=-1)
|
||||
|
||||
def test_over_max_rejected(self) -> None:
|
||||
"""context_lines > MAX_CONTEXT_LINES is rejected by Pydantic."""
|
||||
with pytest.raises(ValidationError):
|
||||
GrepToolSchema(pattern="x", context_lines=MAX_CONTEXT_LINES + 1)
|
||||
@@ -5664,10 +5664,6 @@
|
||||
"title": "Bucket Name",
|
||||
"type": "string"
|
||||
},
|
||||
"cluster": {
|
||||
"description": "An instance of the Couchbase Cluster connected to the desired Couchbase server.",
|
||||
"title": "Cluster"
|
||||
},
|
||||
"collection_name": {
|
||||
"description": "The name of the Couchbase collection to search",
|
||||
"title": "Collection Name",
|
||||
@@ -5716,7 +5712,6 @@
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"cluster",
|
||||
"collection_name",
|
||||
"scope_name",
|
||||
"bucket_name",
|
||||
@@ -10155,6 +10150,141 @@
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
{
|
||||
"description": "A tool that searches file contents on disk using regex patterns. Recursively searches files in a directory for matching lines. Returns matching content with line numbers, file paths only, or match counts.",
|
||||
"env_vars": [],
|
||||
"humanized_name": "Search file contents",
|
||||
"init_params_schema": {
|
||||
"$defs": {
|
||||
"EnvVar": {
|
||||
"properties": {
|
||||
"default": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"title": "Default"
|
||||
},
|
||||
"description": {
|
||||
"title": "Description",
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"title": "Name",
|
||||
"type": "string"
|
||||
},
|
||||
"required": {
|
||||
"default": true,
|
||||
"title": "Required",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"description"
|
||||
],
|
||||
"title": "EnvVar",
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"description": "Tool for searching file contents on disk using regex patterns.\n\nRecursively searches files in a directory for lines matching a regex pattern.\nSupports glob filtering, context lines, and multiple output modes.\n\nExample:\n >>> tool = GrepTool()\n >>> result = tool.run(pattern=\"def.*main\", path=\"src\")\n >>> result = tool.run(\n ... pattern=\"TODO\",\n ... glob_pattern=\"*.py\",\n ... context_lines=2,\n ... )\n\n To search any path on the filesystem (opt-in):\n >>> tool = GrepTool(allow_unrestricted_paths=True)\n >>> result = tool.run(pattern=\"error\", path=\"/var/log/app\")",
|
||||
"properties": {
|
||||
"allow_unrestricted_paths": {
|
||||
"default": false,
|
||||
"description": "When False (default), searches are restricted to the current working directory. Set to True to allow searching any path on the filesystem.",
|
||||
"title": "Allow Unrestricted Paths",
|
||||
"type": "boolean"
|
||||
},
|
||||
"max_file_size_bytes": {
|
||||
"default": 10485760,
|
||||
"description": "Maximum file size in bytes to search. Files larger than this are skipped. Defaults to 10 MB.",
|
||||
"title": "Max File Size Bytes",
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"title": "GrepTool",
|
||||
"type": "object"
|
||||
},
|
||||
"name": "GrepTool",
|
||||
"package_dependencies": [],
|
||||
"run_params_schema": {
|
||||
"description": "Schema for grep tool arguments.",
|
||||
"properties": {
|
||||
"case_insensitive": {
|
||||
"default": false,
|
||||
"description": "Whether to perform case-insensitive matching",
|
||||
"title": "Case Insensitive",
|
||||
"type": "boolean"
|
||||
},
|
||||
"context_lines": {
|
||||
"default": 0,
|
||||
"description": "Number of lines to show before and after each match (0-10)",
|
||||
"maximum": 10,
|
||||
"minimum": 0,
|
||||
"title": "Context Lines",
|
||||
"type": "integer"
|
||||
},
|
||||
"glob_pattern": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"description": "Glob pattern to filter files (e.g. '*.py'). Supports brace expansion (e.g. '*.{ts,tsx}').",
|
||||
"title": "Glob Pattern"
|
||||
},
|
||||
"include_line_numbers": {
|
||||
"default": true,
|
||||
"description": "Whether to prefix matching lines with line numbers",
|
||||
"title": "Include Line Numbers",
|
||||
"type": "boolean"
|
||||
},
|
||||
"output_mode": {
|
||||
"default": "content",
|
||||
"description": "Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
|
||||
"enum": [
|
||||
"content",
|
||||
"files_with_matches",
|
||||
"count"
|
||||
],
|
||||
"title": "Output Mode",
|
||||
"type": "string"
|
||||
},
|
||||
"path": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"description": "File or directory to search in. Defaults to current working directory.",
|
||||
"title": "Path"
|
||||
},
|
||||
"pattern": {
|
||||
"description": "Regex pattern to search for in file contents",
|
||||
"title": "Pattern",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"pattern"
|
||||
],
|
||||
"title": "GrepToolSchema",
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
{
|
||||
"description": "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html",
|
||||
"env_vars": [
|
||||
@@ -14460,13 +14590,9 @@
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/$defs/OxylabsAmazonProductScraperConfig"
|
||||
},
|
||||
"oxylabs_api": {
|
||||
"title": "Oxylabs Api"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"oxylabs_api",
|
||||
"config"
|
||||
],
|
||||
"title": "OxylabsAmazonProductScraperTool",
|
||||
@@ -14689,13 +14815,9 @@
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/$defs/OxylabsAmazonSearchScraperConfig"
|
||||
},
|
||||
"oxylabs_api": {
|
||||
"title": "Oxylabs Api"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"oxylabs_api",
|
||||
"config"
|
||||
],
|
||||
"title": "OxylabsAmazonSearchScraperTool",
|
||||
@@ -14931,13 +15053,9 @@
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/$defs/OxylabsGoogleSearchScraperConfig"
|
||||
},
|
||||
"oxylabs_api": {
|
||||
"title": "Oxylabs Api"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"oxylabs_api",
|
||||
"config"
|
||||
],
|
||||
"title": "OxylabsGoogleSearchScraperTool",
|
||||
@@ -15121,13 +15239,9 @@
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/$defs/OxylabsUniversalScraperConfig"
|
||||
},
|
||||
"oxylabs_api": {
|
||||
"title": "Oxylabs Api"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"oxylabs_api",
|
||||
"config"
|
||||
],
|
||||
"title": "OxylabsUniversalScraperTool",
|
||||
@@ -23229,26 +23343,6 @@
|
||||
"description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
|
||||
"title": "Api Key"
|
||||
},
|
||||
"async_client": {
|
||||
"anyOf": [
|
||||
{},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"title": "Async Client"
|
||||
},
|
||||
"client": {
|
||||
"anyOf": [
|
||||
{},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"title": "Client"
|
||||
},
|
||||
"extract_depth": {
|
||||
"default": "basic",
|
||||
"description": "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
|
||||
@@ -23384,26 +23478,6 @@
|
||||
"description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
|
||||
"title": "Api Key"
|
||||
},
|
||||
"async_client": {
|
||||
"anyOf": [
|
||||
{},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"title": "Async Client"
|
||||
},
|
||||
"client": {
|
||||
"anyOf": [
|
||||
{},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"title": "Client"
|
||||
},
|
||||
"days": {
|
||||
"default": 7,
|
||||
"description": "The number of days to search back.",
|
||||
|
||||
Reference in New Issue
Block a user