From 1eb5d50a5572e82837387b0d3f7cfdbb10c6c421 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 12 Dec 2024 16:00:24 -0500 Subject: [PATCH 1/5] Fix url and api_key args on crawler tool --- .../firecrawl_crawl_website_tool.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index c23ff2100..d753cdd6f 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type - from crewai.tools import BaseTool from pydantic import BaseModel, ConfigDict, Field +import os # Type checking import if TYPE_CHECKING: @@ -27,6 +27,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool): args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema api_key: Optional[str] = None firecrawl: Optional["FirecrawlApp"] = None + url: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) @@ -37,7 +38,11 @@ class FirecrawlCrawlWebsiteTool(BaseTool): "`firecrawl` package not found, please run `pip install firecrawl-py`" ) - self.firecrawl = FirecrawlApp(api_key=api_key) + client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") + if not client_api_key: + raise ValueError("FIRECRAWL_API_KEY is not set") + + self.firecrawl = FirecrawlApp(api_key=client_api_key) def _run( self, @@ -45,13 +50,17 @@ class FirecrawlCrawlWebsiteTool(BaseTool): crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None, ): + # Unless url has been previously set via constructor by the user, + # use the url argument provided by the agent + base_url = self.url or url + if crawler_options is None: crawler_options = {} if page_options is None: page_options = {} options = {"crawlerOptions": crawler_options, "pageOptions": page_options} - return self.firecrawl.crawl_url(url, options) + return self.firecrawl.crawl_url(base_url, options) try: From 2cb33b18e5bab2a6948ad0cb8bba81c27f27bed1 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:18:59 -0500 Subject: [PATCH 2/5] Remove outdated params --- .../firecrawl_crawl_website_tool.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index d753cdd6f..f75685a49 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -10,13 +10,6 @@ if TYPE_CHECKING: class FirecrawlCrawlWebsiteToolSchema(BaseModel): url: str = Field(description="Website URL") - crawler_options: Optional[Dict[str, Any]] = Field( - default=None, description="Options for crawling" - ) - page_options: Optional[Dict[str, Any]] = Field( - default=None, description="Options for page" - ) - class FirecrawlCrawlWebsiteTool(BaseTool): model_config = ConfigDict( From 3a095183c56aff4a7f8e7d9a1324f36f4fa52590 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:20:08 -0500 Subject: [PATCH 3/5] Use proper options and accept custom FirecrawlApp --- .../firecrawl_crawl_website_tool.py | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index f75685a49..07fef7730 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -18,9 +18,12 @@ class FirecrawlCrawlWebsiteTool(BaseTool): name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema + firecrawl_app: Optional["FirecrawlApp"] = None api_key: Optional[str] = None - firecrawl: Optional["FirecrawlApp"] = None url: Optional[str] = None + params: Optional[Dict[str, Any]] = None + poll_interval: Optional[int] = 2 + idempotency_key: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) @@ -31,29 +34,28 @@ class FirecrawlCrawlWebsiteTool(BaseTool): "`firecrawl` package not found, please run `pip install firecrawl-py`" ) - client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") - if not client_api_key: - raise ValueError("FIRECRAWL_API_KEY is not set") + # Allows passing a previously created FirecrawlApp instance + # or builds a new one with the provided API key + if not self.firecrawl_app: + client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") + if not client_api_key: + raise ValueError( + "FIRECRAWL_API_KEY is not set. Please provide it either via the constructor " + "with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable." + ) + self.firecrawl_app = FirecrawlApp(api_key=client_api_key) - self.firecrawl = FirecrawlApp(api_key=client_api_key) - - def _run( - self, - url: str, - crawler_options: Optional[Dict[str, Any]] = None, - page_options: Optional[Dict[str, Any]] = None, - ): + def _run(self, url: str): # Unless url has been previously set via constructor by the user, - # use the url argument provided by the agent + # use the url argument provided by the agent at runtime. base_url = self.url or url - if crawler_options is None: - crawler_options = {} - if page_options is None: - page_options = {} - - options = {"crawlerOptions": crawler_options, "pageOptions": page_options} - return self.firecrawl.crawl_url(base_url, options) + return self.firecrawl_app.crawl_url( + base_url, + params=self.params, + poll_interval=self.poll_interval, + idempotency_key=self.idempotency_key + ) try: From 164442223e153bffefcc794f22c71ece86eb095a Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:23:53 -0500 Subject: [PATCH 4/5] Organize imports --- .../firecrawl_crawl_website_tool.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 07fef7730..1de7602ec 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,7 +1,9 @@ -from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from crewai.tools import BaseTool -from pydantic import BaseModel, ConfigDict, Field import os +from typing import TYPE_CHECKING, Any, Dict, Optional, Type + +from pydantic import BaseModel, ConfigDict, Field + +from crewai.tools import BaseTool # Type checking import if TYPE_CHECKING: From 668e87d5e13ea45f8b388dcf2a9a8187048e381c Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:26:46 -0500 Subject: [PATCH 5/5] Add constructor comments --- .../firecrawl_crawl_website_tool.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 1de7602ec..edada38dd 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -28,6 +28,17 @@ class FirecrawlCrawlWebsiteTool(BaseTool): idempotency_key: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): + """Initialize FirecrawlCrawlWebsiteTool. + + Args: + api_key (Optional[str]): Firecrawl API key. If not provided, will check FIRECRAWL_API_KEY env var. + url (Optional[str]): Base URL to crawl. Can be overridden by the _run method. + firecrawl_app (Optional[FirecrawlApp]): Previously created FirecrawlApp instance. + params (Optional[Dict[str, Any]]): Additional parameters to pass to the FirecrawlApp. + poll_interval (Optional[int]): Poll interval for the FirecrawlApp. + idempotency_key (Optional[str]): Idempotency key for the FirecrawlApp. + **kwargs: Additional arguments passed to BaseTool. + """ super().__init__(**kwargs) try: from firecrawl import FirecrawlApp # type: ignore