mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 09:08:31 +00:00
Merge pull request #145 from caike/fix-firecrawler-crawl-args
Fix url and api_key args on crawler tool
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
# Type checking import
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
@@ -10,13 +12,6 @@ if TYPE_CHECKING:
|
||||
|
||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
crawler_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for crawling"
|
||||
)
|
||||
page_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for page"
|
||||
)
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
model_config = ConfigDict(
|
||||
@@ -25,10 +20,25 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
description: str = "Crawl webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
firecrawl_app: Optional["FirecrawlApp"] = None
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional["FirecrawlApp"] = None
|
||||
url: Optional[str] = None
|
||||
params: Optional[Dict[str, Any]] = None
|
||||
poll_interval: Optional[int] = 2
|
||||
idempotency_key: Optional[str] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
"""Initialize FirecrawlCrawlWebsiteTool.
|
||||
|
||||
Args:
|
||||
api_key (Optional[str]): Firecrawl API key. If not provided, will check FIRECRAWL_API_KEY env var.
|
||||
url (Optional[str]): Base URL to crawl. Can be overridden by the _run method.
|
||||
firecrawl_app (Optional[FirecrawlApp]): Previously created FirecrawlApp instance.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters to pass to the FirecrawlApp.
|
||||
poll_interval (Optional[int]): Poll interval for the FirecrawlApp.
|
||||
idempotency_key (Optional[str]): Idempotency key for the FirecrawlApp.
|
||||
**kwargs: Additional arguments passed to BaseTool.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
@@ -37,21 +47,28 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
# Allows passing a previously created FirecrawlApp instance
|
||||
# or builds a new one with the provided API key
|
||||
if not self.firecrawl_app:
|
||||
client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
|
||||
if not client_api_key:
|
||||
raise ValueError(
|
||||
"FIRECRAWL_API_KEY is not set. Please provide it either via the constructor "
|
||||
"with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable."
|
||||
)
|
||||
self.firecrawl_app = FirecrawlApp(api_key=client_api_key)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
crawler_options: Optional[Dict[str, Any]] = None,
|
||||
page_options: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
if crawler_options is None:
|
||||
crawler_options = {}
|
||||
if page_options is None:
|
||||
page_options = {}
|
||||
def _run(self, url: str):
|
||||
# Unless url has been previously set via constructor by the user,
|
||||
# use the url argument provided by the agent at runtime.
|
||||
base_url = self.url or url
|
||||
|
||||
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
return self.firecrawl_app.crawl_url(
|
||||
base_url,
|
||||
params=self.params,
|
||||
poll_interval=self.poll_interval,
|
||||
idempotency_key=self.idempotency_key
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user