Refactor: Clean up FirecrawlCrawlWebsiteTool schema field descriptions and formatting for improved readability

This commit is contained in:
lorenzejay
2025-04-04 11:42:32 -07:00
parent e0adb4695c
commit 89394ef3e3

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional, Type from typing import Any, Optional, Type
from crewai.tools import BaseTool from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
@@ -14,16 +14,19 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
maxDepth: Optional[int] = Field( maxDepth: Optional[int] = Field(
default=2, default=2,
description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.") description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.",
)
limit: Optional[int] = Field( limit: Optional[int] = Field(
default=100, default=100, description="Maximum number of pages to crawl."
description="Maximum number of pages to crawl.") )
allowExternalLinks: Optional[bool] = Field( allowExternalLinks: Optional[bool] = Field(
default=False, default=False,
description="Allows the crawler to follow links that point to external domains.") description="Allows the crawler to follow links that point to external domains.",
)
formats: Optional[list[str]] = Field( formats: Optional[list[str]] = Field(
default=["markdown", "screenshot", "links"], default=["markdown", "screenshot", "links"],
description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).") description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).",
)
timeout: Optional[int] = Field( timeout: Optional[int] = Field(
default=30000, default=30000,
description="Timeout in milliseconds for the crawling operation. The default value is 30000.", description="Timeout in milliseconds for the crawling operation. The default value is 30000.",
@@ -39,7 +42,6 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
api_key: Optional[str] = None api_key: Optional[str] = None
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
@@ -91,20 +93,20 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
"scrapeOptions": { "scrapeOptions": {
"formats": ["markdown", "screenshot", "links"], "formats": ["markdown", "screenshot", "links"],
"onlyMainContent": True, "onlyMainContent": True,
"timeout": DEFAULT_TIMEOUT "timeout": DEFAULT_TIMEOUT,
} },
} }
# Add default options not present as parameters # Add default options not present as parameters
crawling_options = DEFAULT_CRAWLING_OPTIONS crawling_options = DEFAULT_CRAWLING_OPTIONS
# Update the values of parameters present # Update the values of parameters present
crawling_options["maxDepth"] = maxDepth crawling_options["maxDepth"] = maxDepth
crawling_options["limit"] = limit crawling_options["limit"] = limit
crawling_options["allowExternalLinks"] = allowExternalLinks crawling_options["allowExternalLinks"] = allowExternalLinks
crawling_options["scrapeOptions"]["formats"] = formats crawling_options["scrapeOptions"]["formats"] = formats
crawling_options["scrapeOptions"]["timeout"] = timeout crawling_options["scrapeOptions"]["timeout"] = timeout
return self._firecrawl.crawl_url(url, crawling_options) return self._firecrawl.crawl_url(url, crawling_options)