Squashed 'packages/tools/' content from commit 78317b9c

git-subtree-dir: packages/tools git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
2026-01-19 21:08:13 +00:00 · 2025-09-12 21:58:02 -04:00
commit e16606672a
303 changed files with 49010 additions and 0 deletions
--- a/crewai_tools/tools/github_search_tool/README.md
+++ b/crewai_tools/tools/github_search_tool/README.md
@@ -0,0 +1,67 @@
+# GithubSearchTool
+
+## Description
+The GithubSearchTool is a Retrieval Augmented Generation (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
+
+## Installation
+To use the GithubSearchTool, first ensure the crewai_tools package is installed in your Python environment:
+
+```shell
+pip install 'crewai[tools]'
+```
+
+This command installs the necessary package to run the GithubSearchTool along with any other tools included in the crewai_tools package.
+
+## Example
+Here’s how you can use the GithubSearchTool to perform semantic searches within a GitHub repository:
+```python
+from crewai_tools import GithubSearchTool
+
+# Initialize the tool for semantic searches within a specific GitHub repository
+tool = GithubSearchTool(
+    gh_token='...',
+	github_repo='https://github.com/example/repo',
+	content_types=['code', 'issue'] # Options: code, repo, pr, issue
+)
+
+# OR
+
+# Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
+tool = GithubSearchTool(
+    gh_token='...',
+	content_types=['code', 'issue'] # Options: code, repo, pr, issue
+)
+```
+
+## Arguments
+- `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches.
+- `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
+- `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.
+
+## Custom model and embeddings
+
+By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
+
+```python
+tool = GithubSearchTool(
+    config=dict(
+        llm=dict(
+            provider="ollama", # or google, openai, anthropic, llama2, ...
+            config=dict(
+                model="llama2",
+                # temperature=0.5,
+                # top_p=1,
+                # stream=true,
+            ),
+        ),
+        embedder=dict(
+            provider="google",
+            config=dict(
+                model="models/embedding-001",
+                task_type="retrieval_document",
+                # title="Embeddings",
+            ),
+        ),
+    )
+)
+```
--- a/crewai_tools/tools/github_search_tool/github_search_tool.py
+++ b/crewai_tools/tools/github_search_tool/github_search_tool.py
@@ -0,0 +1,88 @@
+from typing import List, Optional, Type, Any
+
+try:
+    from embedchain.loaders.github import GithubLoader
+    EMBEDCHAIN_AVAILABLE = True
+except ImportError:
+    EMBEDCHAIN_AVAILABLE = False
+
+from pydantic import BaseModel, Field, PrivateAttr
+
+from ..rag.rag_tool import RagTool
+
+
+class FixedGithubSearchToolSchema(BaseModel):
+    """Input for GithubSearchTool."""
+
+    search_query: str = Field(
+        ...,
+        description="Mandatory search query you want to use to search the github repo's content",
+    )
+
+
+class GithubSearchToolSchema(FixedGithubSearchToolSchema):
+    """Input for GithubSearchTool."""
+
+    github_repo: str = Field(..., description="Mandatory github you want to search")
+    content_types: List[str] = Field(
+        ...,
+        description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]",
+    )
+
+
+class GithubSearchTool(RagTool):
+    name: str = "Search a github repo's content"
+    description: str = (
+        "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
+    )
+    summarize: bool = False
+    gh_token: str
+    args_schema: Type[BaseModel] = GithubSearchToolSchema
+    content_types: List[str] = Field(
+        default_factory=lambda: ["code", "repo", "pr", "issue"],
+        description="Content types you want to be included search, options: [code, repo, pr, issue]",
+    )
+    _loader: Any | None = PrivateAttr(default=None)
+
+    def __init__(
+        self,
+        github_repo: Optional[str] = None,
+        content_types: Optional[List[str]] = None,
+        **kwargs,
+    ):
+        if not EMBEDCHAIN_AVAILABLE:
+            raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
+        super().__init__(**kwargs)
+        self._loader = GithubLoader(config={"token": self.gh_token})
+
+        if github_repo and content_types:
+            self.add(repo=github_repo, content_types=content_types)
+            self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
+            self.args_schema = FixedGithubSearchToolSchema
+            self._generate_description()
+
+    def add(
+        self,
+        repo: str,
+        content_types: Optional[List[str]] = None,
+    ) -> None:
+        content_types = content_types or self.content_types
+
+        super().add(
+            f"repo:{repo} type:{','.join(content_types)}",
+            data_type="github",
+            loader=self._loader,
+        )
+
+    def _run(
+        self,
+        search_query: str,
+        github_repo: Optional[str] = None,
+        content_types: Optional[List[str]] = None,
+    ) -> str:
+        if github_repo:
+            self.add(
+                repo=github_repo,
+                content_types=content_types,
+            )
+        return super()._run(query=search_query)