From 89394ef3e3d60966252b9c3782118594527daa6a Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Fri, 4 Apr 2025 11:42:32 -0700 Subject: [PATCH] Refactor: Clean up FirecrawlCrawlWebsiteTool schema field descriptions and formatting for improved readability --- .../firecrawl_crawl_website_tool.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 82bd913cd..f91ad3184 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Type +from typing import Any, Optional, Type from crewai.tools import BaseTool from pydantic import BaseModel, ConfigDict, Field, PrivateAttr @@ -14,16 +14,19 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): url: str = Field(description="Website URL") maxDepth: Optional[int] = Field( default=2, - description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.") + description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.", + ) limit: Optional[int] = Field( - default=100, - description="Maximum number of pages to crawl.") + default=100, description="Maximum number of pages to crawl." + ) allowExternalLinks: Optional[bool] = Field( default=False, - description="Allows the crawler to follow links that point to external domains.") + description="Allows the crawler to follow links that point to external domains.", + ) formats: Optional[list[str]] = Field( default=["markdown", "screenshot", "links"], - description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).") + description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).", + ) timeout: Optional[int] = Field( default=30000, description="Timeout in milliseconds for the crawling operation. The default value is 30000.", @@ -39,7 +42,6 @@ class FirecrawlCrawlWebsiteTool(BaseTool): args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema api_key: Optional[str] = None _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) - def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) @@ -91,20 +93,20 @@ class FirecrawlCrawlWebsiteTool(BaseTool): "scrapeOptions": { "formats": ["markdown", "screenshot", "links"], "onlyMainContent": True, - "timeout": DEFAULT_TIMEOUT - } + "timeout": DEFAULT_TIMEOUT, + }, } - + # Add default options not present as parameters crawling_options = DEFAULT_CRAWLING_OPTIONS - + # Update the values of parameters present crawling_options["maxDepth"] = maxDepth crawling_options["limit"] = limit crawling_options["allowExternalLinks"] = allowExternalLinks crawling_options["scrapeOptions"]["formats"] = formats crawling_options["scrapeOptions"]["timeout"] = timeout - + return self._firecrawl.crawl_url(url, crawling_options)