diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index f75685a49..07fef7730 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -18,9 +18,12 @@ class FirecrawlCrawlWebsiteTool(BaseTool): name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema + firecrawl_app: Optional["FirecrawlApp"] = None api_key: Optional[str] = None - firecrawl: Optional["FirecrawlApp"] = None url: Optional[str] = None + params: Optional[Dict[str, Any]] = None + poll_interval: Optional[int] = 2 + idempotency_key: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) @@ -31,29 +34,28 @@ class FirecrawlCrawlWebsiteTool(BaseTool): "`firecrawl` package not found, please run `pip install firecrawl-py`" ) - client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") - if not client_api_key: - raise ValueError("FIRECRAWL_API_KEY is not set") + # Allows passing a previously created FirecrawlApp instance + # or builds a new one with the provided API key + if not self.firecrawl_app: + client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") + if not client_api_key: + raise ValueError( + "FIRECRAWL_API_KEY is not set. Please provide it either via the constructor " + "with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable." + ) + self.firecrawl_app = FirecrawlApp(api_key=client_api_key) - self.firecrawl = FirecrawlApp(api_key=client_api_key) - - def _run( - self, - url: str, - crawler_options: Optional[Dict[str, Any]] = None, - page_options: Optional[Dict[str, Any]] = None, - ): + def _run(self, url: str): # Unless url has been previously set via constructor by the user, - # use the url argument provided by the agent + # use the url argument provided by the agent at runtime. base_url = self.url or url - if crawler_options is None: - crawler_options = {} - if page_options is None: - page_options = {} - - options = {"crawlerOptions": crawler_options, "pageOptions": page_options} - return self.firecrawl.crawl_url(base_url, options) + return self.firecrawl_app.crawl_url( + base_url, + params=self.params, + poll_interval=self.poll_interval, + idempotency_key=self.idempotency_key + ) try: