From 180cc38330e4eaacfdb0bb168f5812e94eeda85e Mon Sep 17 00:00:00 2001 From: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com> Date: Fri, 27 Jun 2025 11:27:48 -0700 Subject: [PATCH] refactor: update Firecrawl tools to improve configuration and error handling (#351) - Added TYPE_CHECKING imports for FirecrawlApp to enhance type safety. - Updated configuration keys in FirecrawlCrawlWebsiteTool and FirecrawlScrapeWebsiteTool to camelCase for consistency. - Introduced error handling in the _run methods of both tools to ensure FirecrawlApp is properly initialized before usage. - Adjusted parameters passed to crawl_url and scrape_url methods to use 'params' instead of unpacking the config dictionary directly. --- .../firecrawl_crawl_website_tool.py | 36 +++++++++++-------- .../firecrawl_scrape_website_tool.py | 32 +++++++++++------ .../firecrawl_search_tool.py | 2 +- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 6642fbd54..0d2ef325e 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,12 +1,17 @@ -from typing import Any, Optional, Type, List +from typing import Any, Optional, Type, List, TYPE_CHECKING from crewai.tools import BaseTool from pydantic import BaseModel, ConfigDict, Field, PrivateAttr +if TYPE_CHECKING: + from firecrawl import FirecrawlApp + try: - from firecrawl import FirecrawlApp, ScrapeOptions + from firecrawl import FirecrawlApp + + FIRECRAWL_AVAILABLE = True except ImportError: - FirecrawlApp = Any + FIRECRAWL_AVAILABLE = False class FirecrawlCrawlWebsiteToolSchema(BaseModel): @@ -42,16 +47,16 @@ class FirecrawlCrawlWebsiteTool(BaseTool): api_key: Optional[str] = None config: Optional[dict[str, Any]] = Field( default_factory=lambda: { - "max_depth": 2, - "ignore_sitemap": True, - "limit": 100, - "allow_backward_links": False, - "allow_external_links": False, - "scrape_options": ScrapeOptions( - formats=["markdown", "screenshot", "links"], - only_main_content=True, - timeout=30000, - ), + "maxDepth": 2, + "ignoreSitemap": True, + "limit": 10, + "allowBackwardLinks": False, + "allowExternalLinks": False, + "scrapeOptions": { + "formats": ["markdown", "screenshot", "links"], + "onlyMainContent": True, + "timeout": 10000, + }, } ) _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) @@ -88,7 +93,10 @@ class FirecrawlCrawlWebsiteTool(BaseTool): ) def _run(self, url: str): - return self._firecrawl.crawl_url(url, **self.config) + if not self._firecrawl: + raise RuntimeError("FirecrawlApp not properly initialized") + + return self._firecrawl.crawl_url(url, poll_interval=2, params=self.config) try: diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index acb1c0af5..31742340d 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,16 +1,23 @@ -from typing import Any, Optional, Type, Dict, List +from typing import Any, Optional, Type, Dict, List, TYPE_CHECKING from crewai.tools import BaseTool from pydantic import BaseModel, ConfigDict, Field, PrivateAttr +if TYPE_CHECKING: + from firecrawl import FirecrawlApp + try: from firecrawl import FirecrawlApp + + FIRECRAWL_AVAILABLE = True except ImportError: - FirecrawlApp = Any + FIRECRAWL_AVAILABLE = False + class FirecrawlScrapeWebsiteToolSchema(BaseModel): url: str = Field(description="Website URL") + class FirecrawlScrapeWebsiteTool(BaseTool): """ Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key. @@ -21,11 +28,11 @@ class FirecrawlScrapeWebsiteTool(BaseTool): Default configuration options: formats (list[str]): Content formats to return. Default: ["markdown"] - only_main_content (bool): Only return main content. Default: True - include_tags (list[str]): Tags to include. Default: [] - exclude_tags (list[str]): Tags to exclude. Default: [] + onlyMainContent (bool): Only return main content. Default: True + includeTags (list[str]): Tags to include. Default: [] + excludeTags (list[str]): Tags to exclude. Default: [] headers (dict): Headers to include. Default: {} - wait_for (int): Time to wait for page to load in ms. Default: 0 + waitFor (int): Time to wait for page to load in ms. Default: 0 json_options (dict): Options for JSON extraction. Default: None """ @@ -39,11 +46,11 @@ class FirecrawlScrapeWebsiteTool(BaseTool): config: Dict[str, Any] = Field( default_factory=lambda: { "formats": ["markdown"], - "only_main_content": True, - "include_tags": [], - "exclude_tags": [], + "onlyMainContent": True, + "includeTags": [], + "excludeTags": [], "headers": {}, - "wait_for": 0, + "waitFor": 0, } ) @@ -74,7 +81,10 @@ class FirecrawlScrapeWebsiteTool(BaseTool): self._firecrawl = FirecrawlApp(api_key=api_key) def _run(self, url: str): - return self._firecrawl.scrape_url(url, **self.config) + if not self._firecrawl: + raise RuntimeError("FirecrawlApp not properly initialized") + + return self._firecrawl.scrape_url(url, params=self.config) try: diff --git a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 0fb091b68..1cad4819a 100644 --- a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -98,7 +98,7 @@ class FirecrawlSearchTool(BaseTool): return self._firecrawl.search( query=query, - **self.config, + params=self.config, )