Use proper options and accept custom FirecrawlApp

This commit is contained in:
Carlos Souza
2024-12-13 11:20:08 -05:00
parent 2cb33b18e5
commit 3a095183c5

View File

@@ -18,9 +18,12 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
name: str = "Firecrawl web crawl tool"
description: str = "Crawl webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
firecrawl_app: Optional["FirecrawlApp"] = None
api_key: Optional[str] = None
firecrawl: Optional["FirecrawlApp"] = None
url: Optional[str] = None
params: Optional[Dict[str, Any]] = None
poll_interval: Optional[int] = 2
idempotency_key: Optional[str] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
@@ -31,29 +34,28 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
if not client_api_key:
raise ValueError("FIRECRAWL_API_KEY is not set")
# Allows passing a previously created FirecrawlApp instance
# or builds a new one with the provided API key
if not self.firecrawl_app:
client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
if not client_api_key:
raise ValueError(
"FIRECRAWL_API_KEY is not set. Please provide it either via the constructor "
"with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable."
)
self.firecrawl_app = FirecrawlApp(api_key=client_api_key)
self.firecrawl = FirecrawlApp(api_key=client_api_key)
def _run(
self,
url: str,
crawler_options: Optional[Dict[str, Any]] = None,
page_options: Optional[Dict[str, Any]] = None,
):
def _run(self, url: str):
# Unless url has been previously set via constructor by the user,
# use the url argument provided by the agent
# use the url argument provided by the agent at runtime.
base_url = self.url or url
if crawler_options is None:
crawler_options = {}
if page_options is None:
page_options = {}
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
return self.firecrawl.crawl_url(base_url, options)
return self.firecrawl_app.crawl_url(
base_url,
params=self.params,
poll_interval=self.poll_interval,
idempotency_key=self.idempotency_key
)
try: