Refactor: Clean up FirecrawlCrawlWebsiteTool schema field descriptions and formatting for improved readability

This commit is contained in:
lorenzejay
2025-04-04 11:42:32 -07:00
parent e0adb4695c
commit 89394ef3e3

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional, Type
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
@@ -14,16 +14,19 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
maxDepth: Optional[int] = Field(
default=2,
description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.")
description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.",
)
limit: Optional[int] = Field(
default=100,
description="Maximum number of pages to crawl.")
default=100, description="Maximum number of pages to crawl."
)
allowExternalLinks: Optional[bool] = Field(
default=False,
description="Allows the crawler to follow links that point to external domains.")
description="Allows the crawler to follow links that point to external domains.",
)
formats: Optional[list[str]] = Field(
default=["markdown", "screenshot", "links"],
description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).")
description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).",
)
timeout: Optional[int] = Field(
default=30000,
description="Timeout in milliseconds for the crawling operation. The default value is 30000.",
@@ -39,7 +42,6 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
api_key: Optional[str] = None
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
@@ -91,20 +93,20 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
"scrapeOptions": {
"formats": ["markdown", "screenshot", "links"],
"onlyMainContent": True,
"timeout": DEFAULT_TIMEOUT
}
"timeout": DEFAULT_TIMEOUT,
},
}
# Add default options not present as parameters
crawling_options = DEFAULT_CRAWLING_OPTIONS
# Update the values of parameters present
crawling_options["maxDepth"] = maxDepth
crawling_options["limit"] = limit
crawling_options["allowExternalLinks"] = allowExternalLinks
crawling_options["scrapeOptions"]["formats"] = formats
crawling_options["scrapeOptions"]["timeout"] = timeout
return self._firecrawl.crawl_url(url, crawling_options)