Merge pull request #38 from mishushakov/main

updated browserbase tool
This commit is contained in:
João Moura
2024-07-14 17:10:42 -07:00
committed by GitHub
2 changed files with 35 additions and 11 deletions

View File

@@ -2,11 +2,17 @@
## Description
[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving.
[Browserbase](https://browserbase.com) is a developer platform to reliably run, manage, and monitor headless browsers.
Power your AI data retrievals with:
- [Serverless Infrastructure](https://docs.browserbase.com/under-the-hood) providing reliable browsers to extract data from complex UIs
- [Stealth Mode](https://docs.browserbase.com/features/stealth-mode) with included fingerprinting tactics and automatic captcha solving
- [Session Debugger](https://docs.browserbase.com/features/sessions) to inspect your Browser Session with networks timeline and logs
- [Live Debug](https://docs.browserbase.com/guides/session-debug-connection/browser-remote-control) to quickly debug your automation
## Installation
- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`).
- Get an API key and Project ID from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID`).
- Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `crewai[tools]` package:
```
@@ -25,5 +31,8 @@ tool = BrowserbaseLoadTool()
## Arguments
- `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
- `text_content`: Optional. Load pages as readable text. Default is `False`.
- `api_key` Optional. Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.
- `project_id` Optional. Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.
- `text_content` Retrieve only text content. Default is `False`.
- `session_id` Optional. Provide an existing Session ID.
- `proxy` Optional. Enable/Disable Proxies."

View File

@@ -10,20 +10,35 @@ class BrowserbaseLoadTool(BaseTool):
description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
api_key: Optional[str] = None
project_id: Optional[str] = None
text_content: Optional[bool] = False
session_id: Optional[str] = None
proxy: Optional[bool] = None
browserbase: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs):
def __init__(
self,
api_key: Optional[str] = None,
project_id: Optional[str] = None,
text_content: Optional[bool] = False,
session_id: Optional[str] = None,
proxy: Optional[bool] = None,
**kwargs,
):
super().__init__(**kwargs)
try:
from browserbase import Browserbase # type: ignore
from browserbase import Browserbase # type: ignore
except ImportError:
raise ImportError(
"`browserbase` package not found, please run `pip install browserbase`"
)
raise ImportError(
"`browserbase` package not found, please run `pip install browserbase`"
)
self.browserbase = Browserbase(api_key=api_key)
self.browserbase = Browserbase(api_key, project_id)
self.text_content = text_content
self.session_id = session_id
self.proxy = proxy
def _run(self, url: str):
return self.browserbase.load_url(url, text_content=self.text_content)
return self.browserbase.load_url(
url, self.text_content, self.session_id, self.proxy
)