chore: update tool specifications

Merge branch 'main' into lorenze/feat/grep-tool
2026-03-17 09:18:18 +00:00 · 2026-03-10 17:32:23 +00:00 · 2026-03-10 10:31:04 -07:00 · 2026-02-17 22:35:58 +00:00 · 2026-02-17 14:34:36 -08:00 · 2026-02-12 10:29:58 -08:00
6 changed files with 1134 additions and 61 deletions
--- a/lib/crewai-tools/src/crewai_tools/init.py
+++ b/lib/crewai-tools/src/crewai_tools/init.py
@@ -88,6 +88,7 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
    GenerateCrewaiAutomationTool,
 )
 from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
+from crewai_tools.tools.grep_tool.grep_tool import GrepTool
 from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
    HyperbrowserLoadTool,
 )
@@ -248,6 +249,7 @@ __all__ = [
    "FirecrawlSearchTool",
    "GenerateCrewaiAutomationTool",
    "GithubSearchTool",
+    "GrepTool",
    "HyperbrowserLoadTool",
    "InvokeCrewAIAutomationTool",
    "JSONSearchTool",
--- a/lib/crewai-tools/src/crewai_tools/tools/init.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/init.py
@@ -77,6 +77,7 @@ from crewai_tools.tools.generate_crewai_automation_tool.generate_crewai_automati
    GenerateCrewaiAutomationTool,
 )
 from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
+from crewai_tools.tools.grep_tool.grep_tool import GrepTool
 from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
    HyperbrowserLoadTool,
 )
@@ -232,6 +233,7 @@ __all__ = [
    "FirecrawlSearchTool",
    "GenerateCrewaiAutomationTool",
    "GithubSearchTool",
+    "GrepTool",
    "HyperbrowserLoadTool",
    "InvokeCrewAIAutomationTool",
    "JSONSearchTool",
--- a/lib/crewai-tools/src/crewai_tools/tools/grep_tool/init.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/grep_tool/init.py
@@ -0,0 +1,3 @@
+from crewai_tools.tools.grep_tool.grep_tool import GrepTool
+
+__all__ = ["GrepTool"]
--- a/lib/crewai-tools/src/crewai_tools/tools/grep_tool/grep_tool.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/grep_tool/grep_tool.py
@@ -0,0 +1,542 @@
+"""Tool for searching file contents on disk using regex patterns."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from itertools import chain
+import os
+from pathlib import Path
+import re
+import signal
+import sys
+from typing import Literal
+
+from crewai.tools import BaseTool
+from pydantic import BaseModel, Field
+
+
+MAX_OUTPUT_CHARS = 50_000
+MAX_FILES = 10_000
+MAX_MATCHES_PER_FILE = 200
+MAX_LINE_LENGTH = 500
+BINARY_CHECK_SIZE = 8192
+MAX_REGEX_LENGTH = 1_000
+REGEX_MATCH_TIMEOUT_SECONDS = 5
+MAX_CONTEXT_LINES = 10
+MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024  # 10 MB
+
+SKIP_DIRS = frozenset(
+    {
+        ".git",
+        "__pycache__",
+        "node_modules",
+        ".venv",
+        "venv",
+        ".tox",
+        ".mypy_cache",
+        ".pytest_cache",
+    }
+)
+
+# File names that may contain secrets or credentials — always excluded from
+# search results to prevent accidental sensitive-content leakage.
+SENSITIVE_FILE_NAMES = frozenset(
+    {
+        ".env",
+        ".env.local",
+        ".env.development",
+        ".env.production",
+        ".env.staging",
+        ".env.test",
+        ".netrc",
+        ".npmrc",
+        ".pypirc",
+        ".docker/config.json",
+        ".aws/credentials",
+        ".ssh/id_rsa",
+        ".ssh/id_ed25519",
+        ".ssh/id_ecdsa",
+        ".ssh/id_dsa",
+        "credentials.json",
+        "service-account.json",
+        "secrets.yaml",
+        "secrets.yml",
+        "secrets.json",
+    }
+)
+
+# Glob-style suffixes that indicate sensitive content (matched against the
+# full file name, e.g. "app.env.bak" won't match, but ".env.bak" will).
+SENSITIVE_FILE_PATTERNS = (
+    ".pem",
+    ".key",
+    ".p12",
+    ".pfx",
+    ".jks",
+    ".keystore",
+)
+
+
+@dataclass
+class MatchLine:
+    """A single line from a search result."""
+
+    line_number: int
+    text: str
+    is_match: bool  # True for match, False for context line
+
+
+@dataclass
+class FileSearchResult:
+    """Search results for a single file."""
+
+    file_path: Path
+    matches: list[list[MatchLine]] = field(default_factory=list)
+    match_count: int = 0
+
+
+class GrepToolSchema(BaseModel):
+    """Schema for grep tool arguments."""
+
+    pattern: str = Field(
+        ..., description="Regex pattern to search for in file contents"
+    )
+    path: str | None = Field(
+        default=None,
+        description="File or directory to search in. Defaults to current working directory.",
+    )
+    glob_pattern: str | None = Field(
+        default=None,
+        description="Glob pattern to filter files (e.g. '*.py'). Supports brace expansion (e.g. '*.{ts,tsx}').",
+    )
+    output_mode: Literal["content", "files_with_matches", "count"] = Field(
+        default="content",
+        description="Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
+    )
+    case_insensitive: bool = Field(
+        default=False,
+        description="Whether to perform case-insensitive matching",
+    )
+    context_lines: int = Field(
+        default=0,
+        ge=0,
+        le=MAX_CONTEXT_LINES,
+        description=f"Number of lines to show before and after each match (0-{MAX_CONTEXT_LINES})",
+    )
+    include_line_numbers: bool = Field(
+        default=True,
+        description="Whether to prefix matching lines with line numbers",
+    )
+
+
+class GrepTool(BaseTool):
+    """Tool for searching file contents on disk using regex patterns.
+
+    Recursively searches files in a directory for lines matching a regex pattern.
+    Supports glob filtering, context lines, and multiple output modes.
+
+    Example:
+        >>> tool = GrepTool()
+        >>> result = tool.run(pattern="def.*main", path="src")
+        >>> result = tool.run(
+        ...     pattern="TODO",
+        ...     glob_pattern="*.py",
+        ...     context_lines=2,
+        ... )
+
+        To search any path on the filesystem (opt-in):
+        >>> tool = GrepTool(allow_unrestricted_paths=True)
+        >>> result = tool.run(pattern="error", path="/var/log/app")
+    """
+
+    name: str = "Search file contents"
+    description: str = (
+        "A tool that searches file contents on disk using regex patterns. "
+        "Recursively searches files in a directory for matching lines. "
+        "Returns matching content with line numbers, file paths only, or match counts."
+    )
+    args_schema: type[BaseModel] = GrepToolSchema
+    allow_unrestricted_paths: bool = Field(
+        default=False,
+        description=(
+            "When False (default), searches are restricted to the current working "
+            "directory. Set to True to allow searching any path on the filesystem."
+        ),
+    )
+    max_file_size_bytes: int = Field(
+        default=MAX_FILE_SIZE_BYTES,
+        description=(
+            "Maximum file size in bytes to search. Files larger than this are "
+            "skipped. Defaults to 10 MB."
+        ),
+    )
+
+    def _run(
+        self,
+        pattern: str,
+        path: str | None = None,
+        glob_pattern: str | None = None,
+        output_mode: Literal["content", "files_with_matches", "count"] = "content",
+        case_insensitive: bool = False,
+        context_lines: int = 0,
+        include_line_numbers: bool = True,
+        **kwargs: object,
+    ) -> str:
+        """Search files for a regex pattern.
+
+        Args:
+            pattern: Regex pattern to search for.
+            path: File or directory to search. Defaults to cwd.
+            glob_pattern: Glob pattern to filter files.
+            output_mode: What to return.
+            case_insensitive: Case-insensitive matching.
+            context_lines: Lines of context around matches.
+            include_line_numbers: Prefix lines with line numbers.
+
+        Returns:
+            Formatted search results as a string.
+        """
+        # Resolve search path — constrained to cwd unless unrestricted
+        cwd = Path(os.getcwd()).resolve()
+        if path:
+            candidate = Path(path)
+            if candidate.is_absolute():
+                search_path = candidate.resolve()
+            else:
+                search_path = (cwd / candidate).resolve()
+            # Prevent traversal outside the working directory (unless opted in)
+            if not self.allow_unrestricted_paths:
+                try:
+                    search_path.relative_to(cwd)
+                except ValueError:
+                    return (
+                        f"Error: Path '{path}' is outside the working directory. "
+                        "Initialize with GrepTool(allow_unrestricted_paths=True) to allow this."
+                    )
+        else:
+            search_path = cwd
+        if not search_path.exists():
+            return f"Error: Path '{search_path}' does not exist."
+
+        # Compile regex with length guard to mitigate ReDoS
+        if len(pattern) > MAX_REGEX_LENGTH:
+            return f"Error: Pattern too long ({len(pattern)} chars). Maximum is {MAX_REGEX_LENGTH}."
+        flags = re.IGNORECASE if case_insensitive else 0
+        try:
+            compiled = re.compile(pattern, flags)
+        except re.error as e:
+            return f"Error: Invalid regex pattern '{pattern}': {e}"
+
+        # Collect files
+        files = self._collect_files(search_path, glob_pattern)
+
+        # Search each file
+        results: list[FileSearchResult] = []
+        for file_path in files:
+            result = self._search_file(file_path, compiled, context_lines)
+            if result is not None:
+                results.append(result)
+
+        if not results:
+            return "No matches found."
+
+        # Format output
+        if output_mode == "files_with_matches":
+            output = self._format_files_with_matches(results)
+        elif output_mode == "count":
+            output = self._format_count(results)
+        else:
+            output = self._format_content(results, include_line_numbers)
+
+        # Truncate if needed
+        if len(output) > MAX_OUTPUT_CHARS:
+            output = (
+                output[:MAX_OUTPUT_CHARS]
+                + "\n\n... Output truncated. Try a narrower search pattern or glob filter."
+            )
+
+        return output
+
+    @staticmethod
+    def _expand_brace_pattern(pattern: str) -> list[str]:
+        """Expand a simple brace pattern into individual globs.
+
+        Handles a single level of brace expansion, e.g.
+        ``*.{py,txt}`` -> ``['*.py', '*.txt']``.
+        Nested braces are *not* supported and the pattern is returned as-is.
+
+        Args:
+            pattern: Glob pattern that may contain ``{a,b,...}`` syntax.
+
+        Returns:
+            List of expanded patterns (or the original if no braces found).
+        """
+        match = re.search(r"\{([^{}]+)\}", pattern)
+        if not match:
+            return [pattern]
+        prefix = pattern[: match.start()]
+        suffix = pattern[match.end() :]
+        alternatives = match.group(1).split(",")
+        return [f"{prefix}{alt.strip()}{suffix}" for alt in alternatives]
+
+    def _collect_files(self, search_path: Path, glob_pattern: str | None) -> list[Path]:
+        """Collect files to search.
+
+        Sensitive files (e.g. ``.env``, ``.netrc``, key material) are
+        automatically excluded even when searched by explicit path so that
+        credentials cannot leak into tool output.
+
+        Args:
+            search_path: File or directory to search.
+            glob_pattern: Optional glob pattern to filter files.
+
+        Returns:
+            List of file paths to search.
+        """
+        if search_path.is_file():
+            if self._is_sensitive_file(search_path):
+                return []
+            return [search_path]
+
+        patterns = self._expand_brace_pattern(glob_pattern) if glob_pattern else ["*"]
+        seen: set[Path] = set()
+        files: list[Path] = []
+        for p in chain.from_iterable(search_path.rglob(pat) for pat in patterns):
+            if not p.is_file():
+                continue
+            if p in seen:
+                continue
+            seen.add(p)
+            # Skip hidden/build directories
+            if any(part in SKIP_DIRS for part in p.relative_to(search_path).parts):
+                continue
+            if self._is_sensitive_file(p):
+                continue
+            files.append(p)
+            if len(files) >= MAX_FILES:
+                break
+
+        return sorted(files)
+
+    @staticmethod
+    def _safe_search(
+        compiled_pattern: re.Pattern[str], line: str
+    ) -> re.Match[str] | None:
+        """Run a regex search with a per-line timeout to mitigate ReDoS.
+
+        On platforms that support SIGALRM (Unix), a timeout is enforced.
+        On Windows, the search runs without a timeout but is still bounded
+        by MAX_LINE_LENGTH truncation applied earlier in the pipeline.
+
+        Args:
+            compiled_pattern: Compiled regex pattern.
+            line: The text line to search.
+
+        Returns:
+            Match object if found, None otherwise (including on timeout).
+        """
+        if sys.platform == "win32":
+            return compiled_pattern.search(line)
+
+        def _timeout_handler(signum: int, frame: object) -> None:
+            raise TimeoutError
+
+        old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
+        signal.alarm(REGEX_MATCH_TIMEOUT_SECONDS)
+        try:
+            return compiled_pattern.search(line)
+        except TimeoutError:
+            return None
+        finally:
+            signal.alarm(0)
+            signal.signal(signal.SIGALRM, old_handler)
+
+    @staticmethod
+    def _is_sensitive_file(file_path: Path) -> bool:
+        """Check whether a file is likely to contain secrets or credentials.
+
+        The check is deliberately conservative — it matches exact file names
+        (e.g. ``.env``, ``.netrc``) as well as common key/certificate
+        extensions.  Files whose *name* starts with ``.env`` (including
+        variants like ``.env.local``, ``.env.production``, etc.) are also
+        excluded.
+
+        Args:
+            file_path: Path to the file.
+
+        Returns:
+            True if the file should be skipped.
+        """
+        name = file_path.name
+
+        # Exact-name match (e.g. ".env", ".netrc", "secrets.json")
+        if name in SENSITIVE_FILE_NAMES:
+            return True
+
+        # Any .env variant (.env.backup, .env.staging.old, …)
+        if name.startswith(".env"):
+            return True
+
+        # Extension-based match for key/cert material
+        if any(name.endswith(ext) for ext in SENSITIVE_FILE_PATTERNS):
+            return True
+
+        # Check path components for well-known sensitive dirs/files
+        # e.g. ".aws/credentials" or ".ssh/id_rsa"
+        parts = file_path.parts
+        for i, _part in enumerate(parts):
+            remaining = "/".join(parts[i:])
+            if remaining in SENSITIVE_FILE_NAMES:
+                return True
+
+        return False
+
+    def _is_binary_file(self, file_path: Path) -> bool:
+        """Check if a file is binary by looking for null bytes.
+
+        Args:
+            file_path: Path to the file.
+
+        Returns:
+            True if the file appears to be binary.
+        """
+        try:
+            with open(file_path, "rb") as f:
+                chunk = f.read(BINARY_CHECK_SIZE)
+                return b"\x00" in chunk
+        except (OSError, PermissionError):
+            return True
+
+    def _search_file(
+        self,
+        file_path: Path,
+        compiled_pattern: re.Pattern[str],
+        context_lines: int,
+    ) -> FileSearchResult | None:
+        """Search a single file for matches.
+
+        Args:
+            file_path: Path to the file.
+            compiled_pattern: Compiled regex pattern.
+            context_lines: Number of context lines around matches.
+
+        Returns:
+            FileSearchResult if matches found, None otherwise.
+        """
+        if self._is_sensitive_file(file_path):
+            return None
+
+        if self._is_binary_file(file_path):
+            return None
+
+        # Skip files that are too large to safely read into memory
+        try:
+            file_size = file_path.stat().st_size
+        except OSError:
+            return None
+        if file_size > self.max_file_size_bytes:
+            return None
+
+        try:
+            with open(file_path, encoding="utf-8", errors="replace") as f:
+                lines = f.readlines()
+        except (OSError, PermissionError):
+            return None
+
+        # Find matching line numbers
+        match_line_nums: list[int] = []
+        for i, line in enumerate(lines):
+            if self._safe_search(compiled_pattern, line):
+                match_line_nums.append(i)
+                if len(match_line_nums) >= MAX_MATCHES_PER_FILE:
+                    break
+
+        if not match_line_nums:
+            return None
+
+        # Build groups of contiguous match blocks with context
+        groups: list[list[MatchLine]] = []
+        current_group: list[MatchLine] = []
+        prev_end = -1
+
+        for match_idx in match_line_nums:
+            start = max(0, match_idx - context_lines)
+            end = min(len(lines), match_idx + context_lines + 1)
+
+            # If this block doesn't overlap with the previous, start a new group
+            if start > prev_end and current_group:
+                groups.append(current_group)
+                current_group = []
+
+            for i in range(max(start, prev_end), end):
+                text = lines[i].rstrip("\n\r")
+                if len(text) > MAX_LINE_LENGTH:
+                    text = text[:MAX_LINE_LENGTH] + "..."
+                current_group.append(
+                    MatchLine(
+                        line_number=i + 1,  # 1-indexed
+                        text=text,
+                        is_match=(i in match_line_nums),
+                    )
+                )
+
+            prev_end = end
+
+        if current_group:
+            groups.append(current_group)
+
+        return FileSearchResult(
+            file_path=file_path,
+            matches=groups,
+            match_count=len(match_line_nums),
+        )
+
+    def _format_content(
+        self,
+        results: list[FileSearchResult],
+        include_line_numbers: bool,
+    ) -> str:
+        """Format results showing matching content.
+
+        Args:
+            results: List of file search results.
+            include_line_numbers: Whether to include line numbers.
+
+        Returns:
+            Formatted string with file paths and matching lines.
+        """
+        parts: list[str] = []
+        for result in results:
+            parts.append(str(result.file_path))
+            for group_idx, group in enumerate(result.matches):
+                if group_idx > 0:
+                    parts.append("--")
+                for match_line in group:
+                    if include_line_numbers:
+                        parts.append(f"{match_line.line_number}: {match_line.text}")
+                    else:
+                        parts.append(match_line.text)
+            parts.append("")  # blank line between files
+        return "\n".join(parts).rstrip()
+
+    def _format_files_with_matches(self, results: list[FileSearchResult]) -> str:
+        """Format results showing only file paths.
+
+        Args:
+            results: List of file search results.
+
+        Returns:
+            One file path per line.
+        """
+        return "\n".join(str(r.file_path) for r in results)
+
+    def _format_count(self, results: list[FileSearchResult]) -> str:
+        """Format results showing match counts per file.
+
+        Args:
+            results: List of file search results.
+
+        Returns:
+            Filepath and count per line.
+        """
+        return "\n".join(f"{r.file_path}: {r.match_count}" for r in results)
--- a/lib/crewai-tools/tests/tools/grep_tool_test.py
+++ b/lib/crewai-tools/tests/tools/grep_tool_test.py
@@ -0,0 +1,450 @@
+"""Unit tests for GrepTool."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from pydantic import ValidationError
+
+from crewai_tools import GrepTool
+from crewai_tools.tools.grep_tool.grep_tool import (
+    MAX_CONTEXT_LINES,
+    MAX_REGEX_LENGTH,
+    GrepToolSchema,
+)
+
+
+@pytest.fixture
+def sample_dir(tmp_path: Path) -> Path:
+    """Create a temp directory with sample files for testing."""
+    # src/main.py
+    src = tmp_path / "src"
+    src.mkdir()
+    (src / "main.py").write_text(
+        "def hello():\n"
+        "    print('Hello, world!')\n"
+        "\n"
+        "def goodbye():\n"
+        "    print('Goodbye, world!')\n"
+        "\n"
+        "class MyClass:\n"
+        "    pass\n"
+    )
+
+    # src/utils.py
+    (src / "utils.py").write_text(
+        "import os\n"
+        "\n"
+        "def helper():\n"
+        "    return os.getcwd()\n"
+        "\n"
+        "CONSTANT = 42\n"
+    )
+
+    # docs/readme.md
+    docs = tmp_path / "docs"
+    docs.mkdir()
+    (docs / "readme.md").write_text(
+        "# Project\n"
+        "\n"
+        "This is a sample project.\n"
+        "It has multiple files.\n"
+    )
+
+    # data/binary.bin
+    data = tmp_path / "data"
+    data.mkdir()
+    (data / "binary.bin").write_bytes(b"\x00\x01\x02\x03\x04binary content")
+
+    # empty.txt
+    (tmp_path / "empty.txt").write_text("")
+
+    # .git/config (should be skipped)
+    git_dir = tmp_path / ".git"
+    git_dir.mkdir()
+    (git_dir / "config").write_text("[core]\n    repositoryformatversion = 0\n")
+
+    return tmp_path
+
+
+class TestGrepTool:
+    """Tests for GrepTool."""
+
+    def setup_method(self) -> None:
+        """Set up test fixtures.
+
+        We use allow_unrestricted_paths=True so that tests using pytest's
+        tmp_path (which lives outside the working directory) are not rejected
+        by the path-restriction guard.
+        """
+        self.tool = GrepTool(allow_unrestricted_paths=True)
+
+    def test_tool_metadata(self) -> None:
+        """Test tool has correct name and description."""
+        assert self.tool.name == "Search file contents"
+        assert "search" in self.tool.description.lower() or "Search" in self.tool.description
+
+    def test_args_schema(self) -> None:
+        """Test that args_schema has correct fields and defaults."""
+        schema = self.tool.args_schema
+        fields = schema.model_fields
+
+        assert "pattern" in fields
+        assert fields["pattern"].is_required()
+
+        assert "path" in fields
+        assert not fields["path"].is_required()
+
+        assert "glob_pattern" in fields
+        assert not fields["glob_pattern"].is_required()
+
+        assert "output_mode" in fields
+        assert not fields["output_mode"].is_required()
+
+        assert "case_insensitive" in fields
+        assert not fields["case_insensitive"].is_required()
+
+        assert "context_lines" in fields
+        assert not fields["context_lines"].is_required()
+
+        assert "include_line_numbers" in fields
+        assert not fields["include_line_numbers"].is_required()
+
+    def test_basic_pattern_match(self, sample_dir: Path) -> None:
+        """Test simple string pattern found in output."""
+        result = self.tool._run(pattern="Hello", path=str(sample_dir))
+        assert "Hello" in result
+
+    def test_regex_pattern(self, sample_dir: Path) -> None:
+        """Test regex pattern matches function definitions."""
+        result = self.tool._run(pattern=r"def\s+\w+", path=str(sample_dir))
+        assert "def hello" in result
+        assert "def goodbye" in result
+        assert "def helper" in result
+
+    def test_case_sensitive_default(self, sample_dir: Path) -> None:
+        """Test that search is case-sensitive by default."""
+        result = self.tool._run(pattern="hello", path=str(sample_dir))
+        # "hello" (lowercase) appears in "def hello():" but not in "Hello, world!"
+        assert "hello" in result
+        # Verify it found the function definition line
+        assert "def hello" in result
+
+    def test_case_insensitive(self, sample_dir: Path) -> None:
+        """Test case-insensitive matching."""
+        result = self.tool._run(
+            pattern="hello", path=str(sample_dir), case_insensitive=True
+        )
+        # Should match both "def hello():" and "Hello, world!"
+        assert "hello" in result.lower()
+        assert "Hello" in result
+
+    def test_output_mode_content(self, sample_dir: Path) -> None:
+        """Test content output mode shows file paths, line numbers, and text."""
+        result = self.tool._run(
+            pattern="CONSTANT", path=str(sample_dir), output_mode="content"
+        )
+        assert "utils.py" in result
+        assert "CONSTANT" in result
+        # Should have line numbers by default
+        assert ": " in result
+
+    def test_output_mode_files_with_matches(self, sample_dir: Path) -> None:
+        """Test files_with_matches output mode shows only file paths."""
+        result = self.tool._run(
+            pattern="def", path=str(sample_dir), output_mode="files_with_matches"
+        )
+        assert "main.py" in result
+        assert "utils.py" in result
+        # Should not contain line content
+        assert "print" not in result
+
+    def test_output_mode_count(self, sample_dir: Path) -> None:
+        """Test count output mode shows filepath: N format."""
+        result = self.tool._run(
+            pattern="def", path=str(sample_dir), output_mode="count"
+        )
+        # main.py has 2 def lines, utils.py has 1
+        assert "main.py: 2" in result
+        assert "utils.py: 1" in result
+
+    def test_context_lines(self, sample_dir: Path) -> None:
+        """Test surrounding context lines are included."""
+        result = self.tool._run(
+            pattern="CONSTANT", path=str(sample_dir), context_lines=2
+        )
+        # Two lines before CONSTANT = 42 is "    return os.getcwd()"
+        assert "return os.getcwd()" in result
+        assert "CONSTANT" in result
+
+    def test_line_numbers_disabled(self, sample_dir: Path) -> None:
+        """Test output without line number prefixes."""
+        result = self.tool._run(
+            pattern="CONSTANT",
+            path=str(sample_dir),
+            include_line_numbers=False,
+        )
+        assert "CONSTANT = 42" in result
+        # Verify no line number prefix (e.g., "6: ")
+        for line in result.strip().split("\n"):
+            if "CONSTANT" in line:
+                assert not line[0].isdigit() or ": " not in line
+
+    def test_glob_pattern_filtering(self, sample_dir: Path) -> None:
+        """Test glob pattern filters to specific file types."""
+        result = self.tool._run(
+            pattern="project",
+            path=str(sample_dir),
+            glob_pattern="*.py",
+            case_insensitive=True,
+        )
+        # "project" appears in readme.md but not in .py files
+        assert "No matches found" in result
+
+    def test_search_single_file(self, sample_dir: Path) -> None:
+        """Test searching a single file by path."""
+        file_path = str(sample_dir / "src" / "main.py")
+        result = self.tool._run(pattern="def", path=file_path)
+        assert "def hello" in result
+        assert "def goodbye" in result
+        # Should not include results from other files
+        assert "helper" not in result
+
+    def test_path_not_found(self) -> None:
+        """Test error message when a relative path doesn't exist."""
+        result = self.tool._run(pattern="test", path="totally_nonexistent_subdir")
+        assert "Error" in result
+        assert "does not exist" in result
+
+    def test_invalid_regex(self, sample_dir: Path) -> None:
+        """Test error message for invalid regex patterns."""
+        result = self.tool._run(pattern="[invalid", path=str(sample_dir))
+        assert "Error" in result
+        assert "Invalid regex" in result
+
+    def test_binary_files_skipped(self, sample_dir: Path) -> None:
+        """Test binary files are not included in results."""
+        result = self.tool._run(pattern="binary", path=str(sample_dir))
+        # binary.bin has null bytes so it should be skipped
+        assert "binary.bin" not in result
+
+    def test_no_matches_found(self, sample_dir: Path) -> None:
+        """Test message when no matches are found."""
+        result = self.tool._run(
+            pattern="zzz_nonexistent_pattern_zzz", path=str(sample_dir)
+        )
+        assert "No matches found" in result
+
+    def test_hidden_dirs_skipped(self, sample_dir: Path) -> None:
+        """Test that .git/ directory contents are not searched."""
+        result = self.tool._run(pattern="repositoryformatversion", path=str(sample_dir))
+        assert "No matches found" in result
+
+    def test_empty_file(self, sample_dir: Path) -> None:
+        """Test searching an empty file doesn't crash."""
+        result = self.tool._run(
+            pattern="anything", path=str(sample_dir / "empty.txt")
+        )
+        assert "No matches found" in result
+
+    def test_run_with_kwargs(self, sample_dir: Path) -> None:
+        """Test _run ignores extra kwargs."""
+        result = self.tool._run(
+            pattern="Hello", path=str(sample_dir), extra_arg="ignored"
+        )
+        assert "Hello" in result
+
+
+class TestPathRestriction:
+    """Tests for path traversal prevention and allow_unrestricted_paths."""
+
+    def test_absolute_path_outside_cwd_blocked(self, tmp_path: Path) -> None:
+        """An absolute path outside cwd is rejected by default."""
+        tool = GrepTool()
+        # tmp_path is almost certainly not under os.getcwd()
+        result = tool._run(pattern="anything", path=str(tmp_path))
+        assert "Error" in result
+        assert "outside the working directory" in result
+
+    def test_relative_traversal_blocked(self, sample_dir: Path) -> None:
+        """A relative path with ../ that escapes cwd is rejected."""
+        tool = GrepTool()
+        result = tool._run(pattern="anything", path="../../etc")
+        assert "Error" in result
+        assert "outside the working directory" in result
+
+    def test_relative_path_within_cwd_allowed(self) -> None:
+        """A relative path that stays inside cwd works fine."""
+        tool = GrepTool()
+        # "." is always within cwd
+        result = tool._run(pattern="zzz_will_not_match_anything_zzz", path=".")
+        # Should not get a traversal error — either matches or "No matches found"
+        assert "outside the working directory" not in result
+
+    def test_allow_unrestricted_paths_bypasses_check(self, tmp_path: Path) -> None:
+        """With allow_unrestricted_paths=True, absolute paths outside cwd are allowed."""
+        # Write a searchable file in tmp_path
+        (tmp_path / "hello.txt").write_text("unrestricted search target\n")
+        tool = GrepTool(allow_unrestricted_paths=True)
+        result = tool._run(pattern="unrestricted", path=str(tmp_path))
+        assert "unrestricted search target" in result
+
+    def test_allow_unrestricted_defaults_false(self) -> None:
+        """The flag defaults to False."""
+        tool = GrepTool()
+        assert tool.allow_unrestricted_paths is False
+
+    def test_error_message_includes_hint(self, tmp_path: Path) -> None:
+        """The traversal error tells the user how to opt in."""
+        tool = GrepTool()
+        result = tool._run(pattern="x", path=str(tmp_path))
+        assert "GrepTool(allow_unrestricted_paths=True)" in result
+
+
+class TestReDoSGuards:
+    """Tests for regex denial-of-service mitigations."""
+
+    def test_pattern_length_rejected(self, sample_dir: Path) -> None:
+        """Patterns exceeding MAX_REGEX_LENGTH are rejected before compilation."""
+        tool = GrepTool(allow_unrestricted_paths=True)
+        long_pattern = "a" * (MAX_REGEX_LENGTH + 1)
+        result = tool._run(pattern=long_pattern, path=str(sample_dir))
+        assert "Error" in result
+        assert "Pattern too long" in result
+
+    def test_pattern_at_max_length_accepted(self, sample_dir: Path) -> None:
+        """A pattern exactly at MAX_REGEX_LENGTH is allowed (boundary check)."""
+        tool = GrepTool(allow_unrestricted_paths=True)
+        exact_pattern = "a" * MAX_REGEX_LENGTH
+        result = tool._run(pattern=exact_pattern, path=str(sample_dir))
+        # Should not get a length error — either matches or "No matches found"
+        assert "Pattern too long" not in result
+
+    def test_safe_search_returns_match(self) -> None:
+        """_safe_search returns a match object for a normal pattern."""
+        compiled = __import__("re").compile(r"hello")
+        match = GrepTool._safe_search(compiled, "say hello world")
+        assert match is not None
+        assert match.group() == "hello"
+
+    def test_safe_search_returns_none_on_no_match(self) -> None:
+        """_safe_search returns None when the pattern doesn't match."""
+        compiled = __import__("re").compile(r"zzz")
+        match = GrepTool._safe_search(compiled, "hello world")
+        assert match is None
+
+
+class TestBraceExpansion:
+    """Tests for glob brace expansion ({a,b} syntax)."""
+
+    def test_expand_simple_brace(self) -> None:
+        """*.{py,txt} expands to ['*.py', '*.txt']."""
+        result = GrepTool._expand_brace_pattern("*.{py,txt}")
+        assert result == ["*.py", "*.txt"]
+
+    def test_expand_three_alternatives(self) -> None:
+        """*.{py,txt,md} expands to three patterns."""
+        result = GrepTool._expand_brace_pattern("*.{py,txt,md}")
+        assert result == ["*.py", "*.txt", "*.md"]
+
+    def test_expand_no_braces_passthrough(self) -> None:
+        """A pattern without braces is returned as a single-element list."""
+        result = GrepTool._expand_brace_pattern("*.py")
+        assert result == ["*.py"]
+
+    def test_expand_strips_whitespace(self) -> None:
+        """Whitespace around alternatives inside braces is stripped."""
+        result = GrepTool._expand_brace_pattern("*.{ py , txt }")
+        assert result == ["*.py", "*.txt"]
+
+    def test_expand_prefix_and_suffix(self) -> None:
+        """Prefix and suffix around the braces are preserved."""
+        result = GrepTool._expand_brace_pattern("src/*.{py,pyi}.bak")
+        assert result == ["src/*.py.bak", "src/*.pyi.bak"]
+
+    def test_brace_glob_end_to_end(self, tmp_path: Path) -> None:
+        """Brace expansion works end-to-end with _collect_files."""
+        (tmp_path / "a.py").write_text("match_me\n")
+        (tmp_path / "b.txt").write_text("match_me\n")
+        (tmp_path / "c.md").write_text("match_me\n")
+
+        tool = GrepTool(allow_unrestricted_paths=True)
+        result = tool._run(
+            pattern="match_me",
+            path=str(tmp_path),
+            glob_pattern="*.{py,txt}",
+        )
+        assert "a.py" in result
+        assert "b.txt" in result
+        # .md should NOT be included
+        assert "c.md" not in result
+
+    def test_brace_glob_no_duplicates(self, tmp_path: Path) -> None:
+        """Files are not reported twice when they match multiple expanded patterns."""
+        (tmp_path / "x.py").write_text("unique_content\n")
+
+        tool = GrepTool(allow_unrestricted_paths=True)
+        result = tool._run(
+            pattern="unique_content",
+            path=str(tmp_path),
+            glob_pattern="*.{py,py}",
+            output_mode="count",
+        )
+        # Should appear exactly once
+        assert result.count("x.py") == 1
+
+
+class TestSensitiveFileProtection:
+    """Tests for sensitive file exclusion (secrets leakage prevention)."""
+
+    @pytest.mark.parametrize(
+        "name",
+        [".env", ".env.local", ".netrc", ".npmrc", "secrets.json", "server.pem"],
+    )
+    def test_sensitive_files_excluded(self, tmp_path: Path, name: str) -> None:
+        """Sensitive files are skipped even if they contain matches."""
+        (tmp_path / name).write_text("MATCH_ME\n")
+        tool = GrepTool(allow_unrestricted_paths=True)
+        result = tool._run(pattern="MATCH_ME", path=str(tmp_path))
+        assert "No matches found" in result
+
+    def test_sensitive_file_blocked_by_direct_path(self, tmp_path: Path) -> None:
+        """A .env passed as the explicit path argument is still blocked."""
+        env = tmp_path / ".env"
+        env.write_text("SECRET=abc\n")
+        tool = GrepTool(allow_unrestricted_paths=True)
+        result = tool._run(pattern="SECRET", path=str(env))
+        assert "No matches found" in result
+
+
+class TestFileSizeLimit:
+    """Tests for max_file_size_bytes guard."""
+
+    def test_large_file_skipped(self, tmp_path: Path) -> None:
+        """Files over max_file_size_bytes are skipped."""
+        (tmp_path / "big.txt").write_text("needle\n" * 100)
+        tool = GrepTool(allow_unrestricted_paths=True, max_file_size_bytes=50)
+        result = tool._run(pattern="needle", path=str(tmp_path))
+        assert "No matches found" in result
+
+    def test_large_file_searched_with_raised_limit(self, tmp_path: Path) -> None:
+        """Raising the limit lets the same file be searched."""
+        (tmp_path / "big.txt").write_text("needle\n" * 100)
+        tool = GrepTool(allow_unrestricted_paths=True, max_file_size_bytes=50_000)
+        result = tool._run(pattern="needle", path=str(tmp_path))
+        assert "needle" in result
+
+
+class TestContextLinesUpperBound:
+    """Tests for context_lines validation bounds."""
+
+    def test_negative_rejected(self) -> None:
+        """context_lines < 0 is rejected by Pydantic."""
+        with pytest.raises(ValidationError):
+            GrepToolSchema(pattern="x", context_lines=-1)
+
+    def test_over_max_rejected(self) -> None:
+        """context_lines > MAX_CONTEXT_LINES is rejected by Pydantic."""
+        with pytest.raises(ValidationError):
+            GrepToolSchema(pattern="x", context_lines=MAX_CONTEXT_LINES + 1)
--- a/lib/crewai-tools/tool.specs.json
+++ b/lib/crewai-tools/tool.specs.json
@@ -5664,10 +5664,6 @@
            "title": "Bucket Name",
            "type": "string"
          },
-          "cluster": {
-            "description": "An instance of the Couchbase Cluster connected to the desired Couchbase server.",
-            "title": "Cluster"
-          },
          "collection_name": {
            "description": "The name of the Couchbase collection to search",
            "title": "Collection Name",
@@ -5716,7 +5712,6 @@
          }
        },
        "required": [
-          "cluster",
          "collection_name",
          "scope_name",
          "bucket_name",
@@ -10155,6 +10150,141 @@
        "type": "object"
      }
    },
+    {
+      "description": "A tool that searches file contents on disk using regex patterns. Recursively searches files in a directory for matching lines. Returns matching content with line numbers, file paths only, or match counts.",
+      "env_vars": [],
+      "humanized_name": "Search file contents",
+      "init_params_schema": {
+        "$defs": {
+          "EnvVar": {
+            "properties": {
+              "default": {
+                "anyOf": [
+                  {
+                    "type": "string"
+                  },
+                  {
+                    "type": "null"
+                  }
+                ],
+                "default": null,
+                "title": "Default"
+              },
+              "description": {
+                "title": "Description",
+                "type": "string"
+              },
+              "name": {
+                "title": "Name",
+                "type": "string"
+              },
+              "required": {
+                "default": true,
+                "title": "Required",
+                "type": "boolean"
+              }
+            },
+            "required": [
+              "name",
+              "description"
+            ],
+            "title": "EnvVar",
+            "type": "object"
+          }
+        },
+        "description": "Tool for searching file contents on disk using regex patterns.\n\nRecursively searches files in a directory for lines matching a regex pattern.\nSupports glob filtering, context lines, and multiple output modes.\n\nExample:\n    >>> tool = GrepTool()\n    >>> result = tool.run(pattern=\"def.*main\", path=\"src\")\n    >>> result = tool.run(\n    ...     pattern=\"TODO\",\n    ...     glob_pattern=\"*.py\",\n    ...     context_lines=2,\n    ... )\n\n    To search any path on the filesystem (opt-in):\n    >>> tool = GrepTool(allow_unrestricted_paths=True)\n    >>> result = tool.run(pattern=\"error\", path=\"/var/log/app\")",
+        "properties": {
+          "allow_unrestricted_paths": {
+            "default": false,
+            "description": "When False (default), searches are restricted to the current working directory. Set to True to allow searching any path on the filesystem.",
+            "title": "Allow Unrestricted Paths",
+            "type": "boolean"
+          },
+          "max_file_size_bytes": {
+            "default": 10485760,
+            "description": "Maximum file size in bytes to search. Files larger than this are skipped. Defaults to 10 MB.",
+            "title": "Max File Size Bytes",
+            "type": "integer"
+          }
+        },
+        "title": "GrepTool",
+        "type": "object"
+      },
+      "name": "GrepTool",
+      "package_dependencies": [],
+      "run_params_schema": {
+        "description": "Schema for grep tool arguments.",
+        "properties": {
+          "case_insensitive": {
+            "default": false,
+            "description": "Whether to perform case-insensitive matching",
+            "title": "Case Insensitive",
+            "type": "boolean"
+          },
+          "context_lines": {
+            "default": 0,
+            "description": "Number of lines to show before and after each match (0-10)",
+            "maximum": 10,
+            "minimum": 0,
+            "title": "Context Lines",
+            "type": "integer"
+          },
+          "glob_pattern": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Glob pattern to filter files (e.g. '*.py'). Supports brace expansion (e.g. '*.{ts,tsx}').",
+            "title": "Glob Pattern"
+          },
+          "include_line_numbers": {
+            "default": true,
+            "description": "Whether to prefix matching lines with line numbers",
+            "title": "Include Line Numbers",
+            "type": "boolean"
+          },
+          "output_mode": {
+            "default": "content",
+            "description": "Output mode: 'content' shows matching lines, 'files_with_matches' shows only file paths, 'count' shows match counts per file",
+            "enum": [
+              "content",
+              "files_with_matches",
+              "count"
+            ],
+            "title": "Output Mode",
+            "type": "string"
+          },
+          "path": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "File or directory to search in. Defaults to current working directory.",
+            "title": "Path"
+          },
+          "pattern": {
+            "description": "Regex pattern to search for in file contents",
+            "title": "Pattern",
+            "type": "string"
+          }
+        },
+        "required": [
+          "pattern"
+        ],
+        "title": "GrepToolSchema",
+        "type": "object"
+      }
+    },
    {
      "description": "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html",
      "env_vars": [
@@ -14460,13 +14590,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsAmazonProductScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsAmazonProductScraperTool",
@@ -14689,13 +14815,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsAmazonSearchScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsAmazonSearchScraperTool",
@@ -14931,13 +15053,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsGoogleSearchScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsGoogleSearchScraperTool",
@@ -15121,13 +15239,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsUniversalScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsUniversalScraperTool",
@@ -23229,26 +23343,6 @@
            "description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
            "title": "Api Key"
          },
-          "async_client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Async Client"
-          },
-          "client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Client"
-          },
          "extract_depth": {
            "default": "basic",
            "description": "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
@@ -23384,26 +23478,6 @@
            "description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
            "title": "Api Key"
          },
-          "async_client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Async Client"
-          },
-          "client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Client"
-          },
          "days": {
            "default": 7,
            "description": "The number of days to search back.",
Author	SHA1	Message	Date
github-actions[bot]	2c78e60f56	chore: update tool specifications	2026-03-10 17:32:23 +00:00
Lorenze Jay	8e336a476f	Merge branch 'main' into lorenze/feat/grep-tool	2026-03-10 10:31:04 -07:00
github-actions[bot]	2d0e81c10d	chore: update tool specifications	2026-02-17 22:35:58 +00:00
Lorenze Jay	c8dd6c006c	Merge branch 'main' into lorenze/feat/grep-tool	2026-02-17 14:34:36 -08:00
lorenzejay	73f44c878d	Merge branch 'lorenze/feat/grep-tool' of github.com:crewAIInc/crewAI into lorenze/feat/grep-tool	2026-02-12 10:29:58 -08:00
lorenzejay	364143a682	fix test	2026-02-12 10:29:46 -08:00
github-actions[bot]	f894d8cf9d	chore: update tool specifications	2026-02-12 18:29:36 +00:00
lorenzejay	1f0265781a	Merge branch 'lorenze/feat/grep-tool' of github.com:crewAIInc/crewAI into lorenze/feat/grep-tool	2026-02-12 10:28:16 -08:00
lorenzejay	9fae6c0adf	feat: enhance GrepTool with sensitive file exclusion and file size limit - Added MAX_CONTEXT_LINES to define the upper limit for context lines shown in search results. - Introduced MAX_FILE_SIZE_BYTES to skip files larger than 10 MB during searches. - Implemented logic to exclude sensitive files (e.g., .env, .netrc) from search results to prevent accidental leakage of credentials. - Updated tests to validate sensitive file exclusion and file size limits, ensuring robustness in handling sensitive content.	2026-02-12 10:27:24 -08:00
Lorenze Jay	dea2e1e715	Merge branch 'main' into lorenze/feat/grep-tool	2026-02-12 09:24:15 -08:00
github-actions[bot]	b97fc83656	chore: update tool specifications	2026-02-12 04:47:03 +00:00
lorenzejay	925ed7850e	linted	2026-02-11 20:45:40 -08:00
lorenzejay	ec2b6a0287	feat: enhance GrepTool with regex length limit, path restrictions, and brace expansion support - Added MAX_REGEX_LENGTH to limit regex pattern length and prevent ReDoS. - Introduced allow_unrestricted_paths option to enable searching outside the current working directory. - Implemented brace expansion for glob patterns to support multiple file types. - Enhanced error handling for path traversal and regex compilation. - Updated tests to cover new features and ensure robustness.	2026-02-11 20:44:46 -08:00
Lorenze Jay	25835ca795	Merge branch 'main' into lorenze/feat/grep-tool	2026-02-11 14:23:35 -08:00
Lorenze Jay	e65940816b	Merge branch 'main' into lorenze/feat/grep-tool	2026-02-09 11:28:49 -08:00
Lorenze Jay	ad2435f5c1	Merge branch 'main' into lorenze/feat/grep-tool	2026-02-05 12:02:33 -08:00
github-actions[bot]	c9971a7418	chore: update tool specifications	2026-02-04 19:52:01 +00:00
lorenzejay	f04bedc9ab	moved to tools	2026-02-04 11:50:43 -08:00
Lorenze Jay	5a14007511	native support for grep	2026-02-04 10:28:35 -08:00