Merge pull request #60 from MULTI-ON/main

add multion tool
This commit is contained in:
João Moura
2024-07-12 09:10:01 -07:00
committed by GitHub
5 changed files with 150 additions and 0 deletions

View File

@@ -16,6 +16,7 @@ from .tools import (
JSONSearchTool,
LlamaIndexTool,
MDXSearchTool,
MultiOnTool,
PDFSearchTool,
PGSearchTool,
RagTool,

View File

@@ -15,6 +15,7 @@ from .github_search_tool.github_search_tool import GithubSearchTool
from .json_search_tool.json_search_tool import JSONSearchTool
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
from .multion_tool.multion_tool import MultiOnTool
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
from .pg_seach_tool.pg_search_tool import PGSearchTool
from .rag.rag_tool import RagTool

View File

@@ -0,0 +1,54 @@
# MultiOnTool Documentation
## Description
The MultiOnTool, integrated within the crewai_tools package, empowers CrewAI agents with the capability to navigate and interact with the web through natural language instructions. Leveraging the Multion API, this tool facilitates seamless web browsing, making it an essential asset for projects requiring dynamic web data interaction.
## Installation
Ensure the `crewai[tools]` package is installed in your environment to use the MultiOnTool. If it's not already installed, you can add it using the command below:
```shell
pip install 'crewai[tools]'
```
## Example
The following example demonstrates how to initialize the tool and execute a search with a given query:
```python
from crewai import Agent, Task, Crew
from crewai_tools import MultiOnTool
# Initialize the tool from a MultiOn Tool
multion_tool = MultiOnTool(api_key= "YOUR_MULTION_API_KEY", local=False)
Browser = Agent(
role="Browser Agent",
goal="control web browsers using natural language ",
backstory="An expert browsing agent.",
tools=[multion_remote_tool],
verbose=True,
)
# example task to search and summarize news
browse = Task(
description="Summarize the top 3 trending AI News headlines",
expected_output="A summary of the top 3 trending AI News headlines",
agent=Browser,
)
crew = Crew(agents=[Browser], tasks=[browse])
crew.kickoff()
```
## Arguments
- `api_key`: Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
- `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked.
- `max_steps`: Optional. Set the max_steps the multion agent can take for a command
## Steps to Get Started
To effectively use the `MultiOnTool`, follow these steps:
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension).
3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API

View File

@@ -0,0 +1,29 @@
import os
from crewai import Agent, Crew, Task
from multion_tool import MultiOnTool
os.environ["OPENAI_API_KEY"] = "Your Key"
multion_browse_tool = MultiOnTool(api_key="Your Key")
# Create a new agent
Browser = Agent(
role="Browser Agent",
goal="control web browsers using natural language ",
backstory="An expert browsing agent.",
tools=[multion_browse_tool],
verbose=True,
)
# Define tasks
browse = Task(
description="Summarize the top 3 trending AI News headlines",
expected_output="A summary of the top 3 trending AI News headlines",
agent=Browser,
)
crew = Crew(agents=[Browser], tasks=[browse])
crew.kickoff()

View File

@@ -0,0 +1,65 @@
"""Multion tool spec."""
from typing import Any, Optional
from crewai_tools.tools.base_tool import BaseTool
class MultiOnTool(BaseTool):
"""Tool to wrap MultiOn Browse Capabilities."""
name: str = "Multion Browse Tool"
description: str = """Multion gives the ability for LLMs to control web browsers using natural language instructions.
If the status is 'CONTINUE', reissue the same instruction to continue execution
"""
multion: Optional[Any] = None
session_id: Optional[str] = None
local: bool = False
max_steps: int = 3
def __init__(
self,
api_key: Optional[str] = None,
local: bool = False,
max_steps: int = 3,
**kwargs,
):
super().__init__(**kwargs)
try:
from multion.client import MultiOn # type: ignore
except ImportError:
raise ImportError(
"`multion` package not found, please run `pip install multion`"
)
self.session_id = None
self.local = local
self.multion = MultiOn(api_key=api_key)
self.max_steps = max_steps
def _run(
self,
cmd: str,
*args: Any,
**kwargs: Any,
) -> str:
"""
Run the Multion client with the given command.
Args:
cmd (str): The detailed and specific natural language instructrion for web browsing
*args (Any): Additional arguments to pass to the Multion client
**kwargs (Any): Additional keyword arguments to pass to the Multion client
"""
browse = self.multion.browse(
cmd=cmd,
session_id=self.session_id,
local=self.local,
max_steps=self.max_steps,
*args,
**kwargs,
)
self.session_id = browse.session_id
return browse.message + "\n\n STATUS: " + browse.status