Fix FirecrawlScrapeWebsiteTool (#298)

* fix FirecrawlScrapeWebsiteTool: add missing config parameter and correct Dict type annotation

- Add required config parameter when creating the tool
- Change type hint from `dict` to `Dict` to resolve Pydantic validation issues

* Update firecrawl_scrape_website_tool.py

- removing optional config
- removing timeout from Pydantic model

* Removing config from __init__

- removing config from __init__

* Update firecrawl_scrape_website_tool.py

- removing timeout
This commit is contained in:
nicoferdi96
2025-05-07 18:34:15 +02:00
committed by GitHub
parent fd4ef4f47a
commit edd4e5bef9

View File

@@ -1,4 +1,4 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type, Dict
from crewai.tools import BaseTool from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
@@ -8,14 +8,8 @@ try:
except ImportError: except ImportError:
FirecrawlApp = Any FirecrawlApp = Any
class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
timeout: Optional[int] = Field(
default=30000,
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
)
class FirecrawlScrapeWebsiteTool(BaseTool): class FirecrawlScrapeWebsiteTool(BaseTool):
""" """
@@ -31,6 +25,8 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
include_tags (list[str]): Tags to include. Default: [] include_tags (list[str]): Tags to include. Default: []
exclude_tags (list[str]): Tags to exclude. Default: [] exclude_tags (list[str]): Tags to exclude. Default: []
headers (dict): Headers to include. Default: {} headers (dict): Headers to include. Default: {}
wait_for (int): Time to wait for page to load in ms. Default: 0
json_options (dict): Options for JSON extraction. Default: None
""" """
model_config = ConfigDict( model_config = ConfigDict(
@@ -40,7 +36,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
description: str = "Scrape webpages using Firecrawl and return the contents" description: str = "Scrape webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
api_key: Optional[str] = None api_key: Optional[str] = None
config: Optional[dict[str, Any]] = Field( config: Dict[str, Any] = Field(
default_factory=lambda: { default_factory=lambda: {
"formats": ["markdown"], "formats": ["markdown"],
"only_main_content": True, "only_main_content": True,