From 5af2108307bcc7c2cca23c154acf02093a31540f Mon Sep 17 00:00:00 2001
From: Shady Ali <121682078+SHIXOOM@users.noreply.github.com>
Date: Sat, 8 Mar 2025 09:35:23 +0200
Subject: [PATCH] Fix: FireCrawl FirecrawlCrawlWebsiteTool for crawling.
 FireCrawl API does not recognize sent paramters (HTTPError: Unexpected error
 during start crawl job: Status code 400. Bad Request - [{'code':
 'unrecognized_keys', 'keys': ['crawlerOptions', 'timeout'], 'path': [],
 'message': 'Unrecognized key in body -- please review the v1 API
 documentation for request body changes'}]) because it has been updated to v1.
 I updated the sent parameters to match v1 and updated their description in
 the readme file

---
 .../firecrawl_crawl_website_tool/README.md    | 11 ++++------
 .../firecrawl_crawl_website_tool.py           | 21 +++++++++++++------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/README.md b/src/crewai_tools/tools/firecrawl_crawl_website_tool/README.md
index 46d011602..f0bf66918 100644
--- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/README.md
+++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/README.md
@@ -31,12 +31,9 @@ tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev')
   - `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
   - `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
 - `crawler_options`: Optional. Options for controlling the crawling behavior.
-  - `includes`: Optional. URL patterns to include in the crawl.
-  - `exclude`: Optional. URL patterns to exclude from the crawl.
-  - `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan).
-  - `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents.
-  - `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.
-  - `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites.
+  - `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.
   - `limit`: Optional. Maximum number of pages to crawl.
-  - `timeout`: Optional. Timeout in milliseconds for the crawling operation.
+  - `scrapeOptions`: Optional. Additional options for controlling the crawler.
+    - `formats`: Optional. Formats for the page's content to be returned (eg. markdown, html, screenshot, links).
+    - `timeout`: Optional. Timeout in milliseconds for the crawling operation.
 
diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py
index b95199c84..878063953 100644
--- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py
+++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py
@@ -68,13 +68,22 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
         timeout: Optional[int] = 30000,
     ):
         if crawler_options is None:
-            crawler_options = {}
+            crawler_options = {
+                                "maxDepth": 2,
+                                "limit": 10,
+                                "scrapeOptions": {
+                                    # same options as in /scrape
+                                    "formats": ["markdown", "screenshot", "links"],
+                                    "timeout": timeout
+                                    }
+                                }
 
-        options = {
-            "crawlerOptions": crawler_options,
-            "timeout": timeout,
-        }
-        return self._firecrawl.crawl_url(url, options)
+        
+        else:
+            crawler_options["scrapeOptions"]["timeout"] = timeout
+
+        
+        return self._firecrawl.crawl_url(url, crawler_options)
 
 
 try: