mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
Fix FirecrawlScrapeWebsiteTool (#298)
* fix FirecrawlScrapeWebsiteTool: add missing config parameter and correct Dict type annotation - Add required config parameter when creating the tool - Change type hint from `dict` to `Dict` to resolve Pydantic validation issues * Update firecrawl_scrape_website_tool.py - removing optional config - removing timeout from Pydantic model * Removing config from __init__ - removing config from __init__ * Update firecrawl_scrape_website_tool.py - removing timeout
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type, Dict
|
||||||
|
|
||||||
from crewai.tools import BaseTool
|
from crewai.tools import BaseTool
|
||||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||||
@@ -8,14 +8,8 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
FirecrawlApp = Any
|
FirecrawlApp = Any
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||||
url: str = Field(description="Website URL")
|
url: str = Field(description="Website URL")
|
||||||
timeout: Optional[int] = Field(
|
|
||||||
default=30000,
|
|
||||||
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||||
"""
|
"""
|
||||||
@@ -31,6 +25,8 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
include_tags (list[str]): Tags to include. Default: []
|
include_tags (list[str]): Tags to include. Default: []
|
||||||
exclude_tags (list[str]): Tags to exclude. Default: []
|
exclude_tags (list[str]): Tags to exclude. Default: []
|
||||||
headers (dict): Headers to include. Default: {}
|
headers (dict): Headers to include. Default: {}
|
||||||
|
wait_for (int): Time to wait for page to load in ms. Default: 0
|
||||||
|
json_options (dict): Options for JSON extraction. Default: None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(
|
model_config = ConfigDict(
|
||||||
@@ -40,7 +36,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
description: str = "Scrape webpages using Firecrawl and return the contents"
|
description: str = "Scrape webpages using Firecrawl and return the contents"
|
||||||
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
|
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
config: Optional[dict[str, Any]] = Field(
|
config: Dict[str, Any] = Field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
"formats": ["markdown"],
|
"formats": ["markdown"],
|
||||||
"only_main_content": True,
|
"only_main_content": True,
|
||||||
|
|||||||
Reference in New Issue
Block a user