Fix FirecrawlScrapeWebsiteTool (#298)

* fix FirecrawlScrapeWebsiteTool: add missing config parameter and correct Dict type annotation

- Add required config parameter when creating the tool
- Change type hint from `dict` to `Dict` to resolve Pydantic validation issues

* Update firecrawl_scrape_website_tool.py

- removing optional config
- removing timeout from Pydantic model

* Removing config from __init__

- removing config from __init__

* Update firecrawl_scrape_website_tool.py

- removing timeout
This commit is contained in:
nicoferdi96
2025-05-07 18:34:15 +02:00
committed by GitHub
parent fd4ef4f47a
commit edd4e5bef9

View File

@@ -1,4 +1,4 @@
from typing import Any, Optional, Type
from typing import Any, Optional, Type, Dict
from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
@@ -8,14 +8,8 @@ try:
except ImportError:
FirecrawlApp = Any
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
timeout: Optional[int] = Field(
default=30000,
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
)
class FirecrawlScrapeWebsiteTool(BaseTool):
"""
@@ -31,6 +25,8 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
include_tags (list[str]): Tags to include. Default: []
exclude_tags (list[str]): Tags to exclude. Default: []
headers (dict): Headers to include. Default: {}
wait_for (int): Time to wait for page to load in ms. Default: 0
json_options (dict): Options for JSON extraction. Default: None
"""
model_config = ConfigDict(
@@ -40,7 +36,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
description: str = "Scrape webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
api_key: Optional[str] = None
config: Optional[dict[str, Any]] = Field(
config: Dict[str, Any] = Field(
default_factory=lambda: {
"formats": ["markdown"],
"only_main_content": True,