Refactor: Clean up FirecrawlCrawlWebsiteTool schema field descriptions and formatting for improved readability

This commit is contained in:
lorenzejay
2025-04-04 11:42:32 -07:00
parent e0adb4695c
commit 89394ef3e3

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional, Type from typing import Any, Optional, Type
from crewai.tools import BaseTool from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
@@ -14,16 +14,19 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
maxDepth: Optional[int] = Field( maxDepth: Optional[int] = Field(
default=2, default=2,
description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.") description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.",
)
limit: Optional[int] = Field( limit: Optional[int] = Field(
default=100, default=100, description="Maximum number of pages to crawl."
description="Maximum number of pages to crawl.") )
allowExternalLinks: Optional[bool] = Field( allowExternalLinks: Optional[bool] = Field(
default=False, default=False,
description="Allows the crawler to follow links that point to external domains.") description="Allows the crawler to follow links that point to external domains.",
)
formats: Optional[list[str]] = Field( formats: Optional[list[str]] = Field(
default=["markdown", "screenshot", "links"], default=["markdown", "screenshot", "links"],
description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).") description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).",
)
timeout: Optional[int] = Field( timeout: Optional[int] = Field(
default=30000, default=30000,
description="Timeout in milliseconds for the crawling operation. The default value is 30000.", description="Timeout in milliseconds for the crawling operation. The default value is 30000.",
@@ -40,7 +43,6 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
api_key: Optional[str] = None api_key: Optional[str] = None
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.api_key = api_key self.api_key = api_key
@@ -91,8 +93,8 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
"scrapeOptions": { "scrapeOptions": {
"formats": ["markdown", "screenshot", "links"], "formats": ["markdown", "screenshot", "links"],
"onlyMainContent": True, "onlyMainContent": True,
"timeout": DEFAULT_TIMEOUT "timeout": DEFAULT_TIMEOUT,
} },
} }
# Add default options not present as parameters # Add default options not present as parameters