Merge branch 'main' into main

This commit is contained in:
Naman Garg
2024-07-08 15:12:25 -07:00
committed by GitHub
15 changed files with 597 additions and 0 deletions

View File

@@ -1,12 +1,17 @@
from .tools import (
BrowserbaseLoadTool,
CodeDocsSearchTool,
CodeInterpreterTool,
ComposioTool,
CSVSearchTool,
DirectoryReadTool,
DirectorySearchTool,
DOCXSearchTool,
EXASearchTool,
FileReadTool,
FirecrawlCrawlWebsiteTool,
FirecrawlScrapeWebsiteTool,
FirecrawlSearchTool,
GithubSearchTool,
JSONSearchTool,
LlamaIndexTool,

View File

@@ -1,11 +1,16 @@
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
from .composio_tool.composio_tool import ComposioTool
from .csv_search_tool.csv_search_tool import CSVSearchTool
from .directory_read_tool.directory_read_tool import DirectoryReadTool
from .directory_search_tool.directory_search_tool import DirectorySearchTool
from .docx_search_tool.docx_search_tool import DOCXSearchTool
from .exa_tools.exa_search_tool import EXASearchTool
from .file_read_tool.file_read_tool import FileReadTool
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import FirecrawlCrawlWebsiteTool
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import FirecrawlScrapeWebsiteTool
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
from .github_search_tool.github_search_tool import GithubSearchTool
from .json_search_tool.json_search_tool import JSONSearchTool
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool

View File

@@ -22,6 +22,8 @@ class BaseTool(BaseModel, ABC):
"""Flag to check if the description has been updated."""
cache_function: Optional[Callable] = lambda _args, _result: True
"""Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
result_as_answer: bool = False
"""Flag to check if the tool should be the final agent answer."""
@validator("args_schema", always=True, pre=True)
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:

View File

@@ -0,0 +1,14 @@
FROM python:3.11-slim
# Install common utilities
RUN apt-get update && apt-get install -y \
build-essential \
curl \
wget \
software-properties-common
# Clean up
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
# Set the working directory
WORKDIR /workspace

View File

@@ -0,0 +1,29 @@
# CodeInterpreterTool
## Description
This tool is used to give the Agent the ability to run code (Python3) from the code generated by the Agent itself. The code is executed in a sandboxed environment, so it is safe to run any code.
It is incredible useful since it allows the Agent to generate code, run it in the same environment, get the result and use it to make decisions.
## Requirements
- Docker
## Installation
Install the crewai_tools package
```shell
pip install 'crewai[tools]'
```
## Example
Remember that when using this tool, the code must be generated by the Agent itself. The code must be a Python3 code. And it will take some time for the first time to run because it needs to build the Docker image.
```python
from crewai_tools import CodeInterpreterTool
Agent(
...
tools=[CodeInterpreterTool()],
)
```

View File

@@ -0,0 +1,94 @@
import importlib.util
import os
from typing import List, Optional, Type
import docker
from crewai_tools.tools.base_tool import BaseTool
from pydantic.v1 import BaseModel, Field
class CodeInterpreterSchema(BaseModel):
"""Input for CodeInterpreterTool."""
code: str = Field(
...,
description="Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code",
)
libraries_used: List[str] = Field(
...,
description="List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4",
)
class CodeInterpreterTool(BaseTool):
name: str = "Code Interpreter"
description: str = "Interprets Python3 code strings with a final print statement."
args_schema: Type[BaseModel] = CodeInterpreterSchema
code: Optional[str] = None
@staticmethod
def _get_installed_package_path():
spec = importlib.util.find_spec("crewai_tools")
return os.path.dirname(spec.origin)
def _verify_docker_image(self) -> None:
"""
Verify if the Docker image is available
"""
image_tag = "code-interpreter:latest"
client = docker.from_env()
try:
client.images.get(image_tag)
except docker.errors.ImageNotFound:
package_path = self._get_installed_package_path()
dockerfile_path = os.path.join(package_path, "tools/code_interpreter_tool")
if not os.path.exists(dockerfile_path):
raise FileNotFoundError(f"Dockerfile not found in {dockerfile_path}")
client.images.build(
path=dockerfile_path,
tag=image_tag,
rm=True,
)
def _run(self, **kwargs) -> str:
code = kwargs.get("code", self.code)
libraries_used = kwargs.get("libraries_used", [])
return self.run_code_in_docker(code, libraries_used)
def _install_libraries(
self, container: docker.models.containers.Container, libraries: List[str]
) -> None:
"""
Install missing libraries in the Docker container
"""
for library in libraries:
container.exec_run(f"pip install {library}")
def _init_docker_container(self) -> docker.models.containers.Container:
client = docker.from_env()
return client.containers.run(
"code-interpreter",
detach=True,
tty=True,
working_dir="/workspace",
name="code-interpreter",
)
def run_code_in_docker(self, code: str, libraries_used: List[str]) -> str:
self._verify_docker_image()
container = self._init_docker_container()
self._install_libraries(container, libraries_used)
cmd_to_run = f'python3 -c "{code}"'
exec_result = container.exec_run(cmd_to_run)
container.stop()
container.remove()
if exec_result.exit_code != 0:
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
return exec_result.output.decode("utf-8")

View File

@@ -0,0 +1,72 @@
# ComposioTool Documentation
## Description
This tools is a wrapper around the composio toolset and gives your agent access to a wide variety of tools from the composio SDK.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install composio-core
pip install 'crewai[tools]'
```
after the installation is complete, either run `composio login` or export your composio API key as `COMPOSIO_API_KEY`.
## Example
The following example demonstrates how to initialize the tool and execute a github action:
1. Initialize toolset
```python
from composio import App
from crewai_tools import ComposioTool
from crewai import Agent, Task
tools = [ComposioTool.from_action(action=Action.GITHUB_ACTIVITY_STAR_REPO_FOR_AUTHENTICATED_USER)]
```
If you don't know what action you want to use, use `from_app` and `tags` filter to get relevant actions
```python
tools = ComposioTool.from_app(App.GITHUB, tags=["important"])
```
or use `use_case` to search relevant actions
```python
tools = ComposioTool.from_app(App.GITHUB, use_case="Star a github repository")
```
2. Define agent
```python
crewai_agent = Agent(
role="Github Agent",
goal="You take action on Github using Github APIs",
backstory=(
"You are AI agent that is responsible for taking actions on Github "
"on users behalf. You need to take action on Github using Github APIs"
),
verbose=True,
tools=tools,
)
```
3. Execute task
```python
task = Task(
description="Star a repo ComposioHQ/composio on GitHub",
agent=crewai_agent,
expected_output="if the star happened",
)
task.execute()
```
* More detailed list of tools can be found [here](https://app.composio.dev)

View File

@@ -0,0 +1,122 @@
"""
Composio tools wrapper.
"""
import typing as t
import typing_extensions as te
from crewai_tools.tools.base_tool import BaseTool
class ComposioTool(BaseTool):
"""Wrapper for composio tools."""
composio_action: t.Callable
def _run(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
"""Run the composio action with given arguments."""
return self.composio_action(*args, **kwargs)
@staticmethod
def _check_connected_account(tool: t.Any, toolset: t.Any) -> None:
"""Check if connected account is required and if required it exists or not."""
from composio import Action
from composio.client.collections import ConnectedAccountModel
tool = t.cast(Action, tool)
if tool.no_auth:
return
connections = t.cast(
t.List[ConnectedAccountModel],
toolset.client.connected_accounts.get(),
)
if tool.app not in [connection.appUniqueId for connection in connections]:
raise RuntimeError(
f"No connected account found for app `{tool.app}`; "
f"Run `composio add {tool.app}` to fix this"
)
@classmethod
def from_action(
cls,
action: t.Any,
**kwargs: t.Any,
) -> te.Self:
"""Wrap a composio tool as crewAI tool."""
from composio import Action, ComposioToolSet
from composio.constants import DEFAULT_ENTITY_ID
from composio.utils.shared import json_schema_to_model
toolset = ComposioToolSet()
if not isinstance(action, Action):
action = Action(action)
action = t.cast(Action, action)
cls._check_connected_account(
tool=action,
toolset=toolset,
)
(action_schema,) = toolset.get_action_schemas(actions=[action])
schema = action_schema.model_dump(exclude_none=True)
entity_id = kwargs.pop("entity_id", DEFAULT_ENTITY_ID)
def function(**kwargs: t.Any) -> t.Dict:
"""Wrapper function for composio action."""
return toolset.execute_action(
action=Action(schema["name"]),
params=kwargs,
entity_id=entity_id,
)
function.__name__ = schema["name"]
function.__doc__ = schema["description"]
return cls(
name=schema["name"],
description=schema["description"],
args_schema=json_schema_to_model(
action_schema.parameters.model_dump(
exclude_none=True,
)
),
composio_action=function,
**kwargs,
)
@classmethod
def from_app(
cls,
*apps: t.Any,
tags: t.Optional[t.List[str]] = None,
use_case: t.Optional[str] = None,
**kwargs: t.Any,
) -> t.List[te.Self]:
"""Create toolset from an app."""
if len(apps) == 0:
raise ValueError("You need to provide at least one app name")
if use_case is None and tags is None:
raise ValueError("Both `use_case` and `tags` cannot be `None`")
if use_case is not None and tags is not None:
raise ValueError(
"Cannot use both `use_case` and `tags` to filter the actions"
)
from composio import ComposioToolSet
toolset = ComposioToolSet()
if use_case is not None:
return [
cls.from_action(action=action, **kwargs)
for action in toolset.find_actions_by_use_case(*apps, use_case=use_case)
]
return [
cls.from_action(action=action, **kwargs)
for action in toolset.find_actions_by_tags(*apps, tags=tags)
]

View File

@@ -0,0 +1,42 @@
# FirecrawlCrawlWebsiteTool
## Description
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
## Installation
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
```
pip install firecrawl-py 'crewai[tools]'
```
## Example
Utilize the FirecrawlScrapeFromWebsiteTool as follows to allow your agent to load websites:
```python
from crewai_tools import FirecrawlCrawlWebsiteTool
tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev')
```
## Arguments
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
- `url`: The base URL to start crawling from.
- `page_options`: Optional.
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
- `crawler_options`: Optional. Options for controlling the crawling behavior.
- `includes`: Optional. URL patterns to include in the crawl.
- `exclude`: Optional. URL patterns to exclude from the crawl.
- `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan).
- `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents.
- `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.
- `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites.
- `limit`: Optional. Maximum number of pages to crawl.
- `timeout`: Optional. Timeout in milliseconds for the crawling operation.

View File

@@ -0,0 +1,33 @@
from typing import Optional, Any, Type, Dict, List
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
class FirecrawlCrawlWebsiteTool(BaseTool):
name: str = "Firecrawl web crawl tool"
description: str = "Crawl webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
api_key: Optional[str] = None
firecrawl: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
options = {
"crawlerOptions": crawler_options,
"pageOptions": page_options
}
return self.firecrawl.crawl_url(url, options)

View File

@@ -0,0 +1,38 @@
# FirecrawlScrapeWebsiteTool
## Description
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
## Installation
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
```
pip install firecrawl-py 'crewai[tools]'
```
## Example
Utilize the FirecrawlScrapeWebsiteTool as follows to allow your agent to load websites:
```python
from crewai_tools import FirecrawlScrapeWebsiteTool
tool = FirecrawlScrapeWebsiteTool(url='firecrawl.dev')
```
## Arguments
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
- `url`: The URL to scrape.
- `page_options`: Optional.
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
- `extractor_options`: Optional. Options for LLM-based extraction of structured information from the page content
- `mode`: The extraction mode to use, currently supports 'llm-extraction'
- `extractionPrompt`: Optional. A prompt describing what information to extract from the page
- `extractionSchema`: Optional. The schema for the data to be extracted
- `timeout`: Optional. Timeout in milliseconds for the request

View File

@@ -0,0 +1,35 @@
from typing import Optional, Any, Type, Dict
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
timeout: Optional[int] = Field(default=None, description="Timeout for the scraping operation")
class FirecrawlScrapeWebsiteTool(BaseTool):
name: str = "Firecrawl web scrape tool"
description: str = "Scrape webpages url using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
api_key: Optional[str] = None
firecrawl: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
options = {
"pageOptions": page_options,
"extractorOptions": extractor_options,
"timeout": timeout
}
return self.firecrawl.scrape_url(url, options)

View File

@@ -0,0 +1,35 @@
# FirecrawlSearchTool
## Description
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
## Installation
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
```
pip install firecrawl-py 'crewai[tools]'
```
## Example
Utilize the FirecrawlSearchTool as follows to allow your agent to load websites:
```python
from crewai_tools import FirecrawlSearchTool
tool = FirecrawlSearchTool(query='what is firecrawl?')
```
## Arguments
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
- `query`: The search query string to be used for searching.
- `page_options`: Optional. Options for result formatting.
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
- `fetchPageContent`: Optional. Fetch the full content of the page.
- `search_options`: Optional. Options for controlling the crawling behavior.
- `limit`: Optional. Maximum number of pages to crawl.

View File

@@ -0,0 +1,33 @@
from typing import Optional, Any, Type, Dict, List
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlSearchToolSchema(BaseModel):
query: str = Field(description="Search query")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
class FirecrawlSearchTool(BaseTool):
name: str = "Firecrawl web search tool"
description: str = "Search webpages using Firecrawl and return the results"
args_schema: Type[BaseModel] = FirecrawlSearchToolSchema
api_key: Optional[str] = None
firecrawl: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
options = {
"pageOptions": page_options,
"resultOptions": result_options
}
return self.firecrawl.search(query, options)

View File

@@ -0,0 +1,38 @@
import unittest
from unittest.mock import patch
from crewai_tools.tools.code_interpreter_tool.code_interpreter_tool import (
CodeInterpreterTool,
)
class TestCodeInterpreterTool(unittest.TestCase):
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
def test_run_code_in_docker(self, docker_mock):
tool = CodeInterpreterTool()
code = "print('Hello, World!')"
libraries_used = "numpy,pandas"
expected_output = "Hello, World!\n"
docker_mock.from_env().containers.run().exec_run().exit_code = 0
docker_mock.from_env().containers.run().exec_run().output = (
expected_output.encode()
)
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
def test_run_code_in_docker_with_error(self, docker_mock):
tool = CodeInterpreterTool()
code = "print(1/0)"
libraries_used = "numpy,pandas"
expected_output = "Something went wrong while running the code: \nZeroDivisionError: division by zero\n"
docker_mock.from_env().containers.run().exec_run().exit_code = 1
docker_mock.from_env().containers.run().exec_run().output = (
b"ZeroDivisionError: division by zero\n"
)
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)