mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
Refactor: Clean up FirecrawlCrawlWebsiteTool schema field descriptions and formatting for improved readability
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, Optional, Type
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
@@ -14,16 +14,19 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
maxDepth: Optional[int] = Field(
|
||||
default=2,
|
||||
description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.")
|
||||
description="Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children and so on.",
|
||||
)
|
||||
limit: Optional[int] = Field(
|
||||
default=100,
|
||||
description="Maximum number of pages to crawl.")
|
||||
default=100, description="Maximum number of pages to crawl."
|
||||
)
|
||||
allowExternalLinks: Optional[bool] = Field(
|
||||
default=False,
|
||||
description="Allows the crawler to follow links that point to external domains.")
|
||||
description="Allows the crawler to follow links that point to external domains.",
|
||||
)
|
||||
formats: Optional[list[str]] = Field(
|
||||
default=["markdown", "screenshot", "links"],
|
||||
description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).")
|
||||
description="Formats for the page's content to be returned (eg. markdown, html, screenshot, links).",
|
||||
)
|
||||
timeout: Optional[int] = Field(
|
||||
default=30000,
|
||||
description="Timeout in milliseconds for the crawling operation. The default value is 30000.",
|
||||
@@ -39,7 +42,6 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -91,20 +93,20 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
"scrapeOptions": {
|
||||
"formats": ["markdown", "screenshot", "links"],
|
||||
"onlyMainContent": True,
|
||||
"timeout": DEFAULT_TIMEOUT
|
||||
}
|
||||
"timeout": DEFAULT_TIMEOUT,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Add default options not present as parameters
|
||||
crawling_options = DEFAULT_CRAWLING_OPTIONS
|
||||
|
||||
|
||||
# Update the values of parameters present
|
||||
crawling_options["maxDepth"] = maxDepth
|
||||
crawling_options["limit"] = limit
|
||||
crawling_options["allowExternalLinks"] = allowExternalLinks
|
||||
crawling_options["scrapeOptions"]["formats"] = formats
|
||||
crawling_options["scrapeOptions"]["timeout"] = timeout
|
||||
|
||||
|
||||
return self._firecrawl.crawl_url(url, crawling_options)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user