Fix url and api_key args on crawler tool

This commit is contained in:
Carlos Souza
2024-12-12 16:00:24 -05:00
parent a49be2fc52
commit 1eb5d50a55

View File

@@ -1,7 +1,7 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Type from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from crewai.tools import BaseTool from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field from pydantic import BaseModel, ConfigDict, Field
import os
# Type checking import # Type checking import
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -27,6 +27,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
api_key: Optional[str] = None api_key: Optional[str] = None
firecrawl: Optional["FirecrawlApp"] = None firecrawl: Optional["FirecrawlApp"] = None
url: Optional[str] = None
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
@@ -37,7 +38,11 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
"`firecrawl` package not found, please run `pip install firecrawl-py`" "`firecrawl` package not found, please run `pip install firecrawl-py`"
) )
self.firecrawl = FirecrawlApp(api_key=api_key) client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
if not client_api_key:
raise ValueError("FIRECRAWL_API_KEY is not set")
self.firecrawl = FirecrawlApp(api_key=client_api_key)
def _run( def _run(
self, self,
@@ -45,13 +50,17 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
crawler_options: Optional[Dict[str, Any]] = None, crawler_options: Optional[Dict[str, Any]] = None,
page_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None,
): ):
# Unless url has been previously set via constructor by the user,
# use the url argument provided by the agent
base_url = self.url or url
if crawler_options is None: if crawler_options is None:
crawler_options = {} crawler_options = {}
if page_options is None: if page_options is None:
page_options = {} page_options = {}
options = {"crawlerOptions": crawler_options, "pageOptions": page_options} options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
return self.firecrawl.crawl_url(url, options) return self.firecrawl.crawl_url(base_url, options)
try: try: