mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
refactor: update Firecrawl tools to improve configuration and error handling (#351)
- Added TYPE_CHECKING imports for FirecrawlApp to enhance type safety. - Updated configuration keys in FirecrawlCrawlWebsiteTool and FirecrawlScrapeWebsiteTool to camelCase for consistency. - Introduced error handling in the _run methods of both tools to ensure FirecrawlApp is properly initialized before usage. - Adjusted parameters passed to crawl_url and scrape_url methods to use 'params' instead of unpacking the config dictionary directly.
This commit is contained in:
@@ -1,12 +1,17 @@
|
|||||||
from typing import Any, Optional, Type, List
|
from typing import Any, Optional, Type, List, TYPE_CHECKING
|
||||||
|
|
||||||
from crewai.tools import BaseTool
|
from crewai.tools import BaseTool
|
||||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from firecrawl import FirecrawlApp, ScrapeOptions
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
|
FIRECRAWL_AVAILABLE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
FirecrawlApp = Any
|
FIRECRAWL_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||||
@@ -42,16 +47,16 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
|||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
config: Optional[dict[str, Any]] = Field(
|
config: Optional[dict[str, Any]] = Field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
"max_depth": 2,
|
"maxDepth": 2,
|
||||||
"ignore_sitemap": True,
|
"ignoreSitemap": True,
|
||||||
"limit": 100,
|
"limit": 10,
|
||||||
"allow_backward_links": False,
|
"allowBackwardLinks": False,
|
||||||
"allow_external_links": False,
|
"allowExternalLinks": False,
|
||||||
"scrape_options": ScrapeOptions(
|
"scrapeOptions": {
|
||||||
formats=["markdown", "screenshot", "links"],
|
"formats": ["markdown", "screenshot", "links"],
|
||||||
only_main_content=True,
|
"onlyMainContent": True,
|
||||||
timeout=30000,
|
"timeout": 10000,
|
||||||
),
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||||
@@ -88,7 +93,10 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _run(self, url: str):
|
def _run(self, url: str):
|
||||||
return self._firecrawl.crawl_url(url, **self.config)
|
if not self._firecrawl:
|
||||||
|
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||||
|
|
||||||
|
return self._firecrawl.crawl_url(url, poll_interval=2, params=self.config)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,16 +1,23 @@
|
|||||||
from typing import Any, Optional, Type, Dict, List
|
from typing import Any, Optional, Type, Dict, List, TYPE_CHECKING
|
||||||
|
|
||||||
from crewai.tools import BaseTool
|
from crewai.tools import BaseTool
|
||||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from firecrawl import FirecrawlApp
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
|
FIRECRAWL_AVAILABLE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
FirecrawlApp = Any
|
FIRECRAWL_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||||
url: str = Field(description="Website URL")
|
url: str = Field(description="Website URL")
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||||
"""
|
"""
|
||||||
Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||||
@@ -21,11 +28,11 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
|
|
||||||
Default configuration options:
|
Default configuration options:
|
||||||
formats (list[str]): Content formats to return. Default: ["markdown"]
|
formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||||
only_main_content (bool): Only return main content. Default: True
|
onlyMainContent (bool): Only return main content. Default: True
|
||||||
include_tags (list[str]): Tags to include. Default: []
|
includeTags (list[str]): Tags to include. Default: []
|
||||||
exclude_tags (list[str]): Tags to exclude. Default: []
|
excludeTags (list[str]): Tags to exclude. Default: []
|
||||||
headers (dict): Headers to include. Default: {}
|
headers (dict): Headers to include. Default: {}
|
||||||
wait_for (int): Time to wait for page to load in ms. Default: 0
|
waitFor (int): Time to wait for page to load in ms. Default: 0
|
||||||
json_options (dict): Options for JSON extraction. Default: None
|
json_options (dict): Options for JSON extraction. Default: None
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -39,11 +46,11 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
config: Dict[str, Any] = Field(
|
config: Dict[str, Any] = Field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
"formats": ["markdown"],
|
"formats": ["markdown"],
|
||||||
"only_main_content": True,
|
"onlyMainContent": True,
|
||||||
"include_tags": [],
|
"includeTags": [],
|
||||||
"exclude_tags": [],
|
"excludeTags": [],
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"wait_for": 0,
|
"waitFor": 0,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -74,7 +81,10 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
self._firecrawl = FirecrawlApp(api_key=api_key)
|
self._firecrawl = FirecrawlApp(api_key=api_key)
|
||||||
|
|
||||||
def _run(self, url: str):
|
def _run(self, url: str):
|
||||||
return self._firecrawl.scrape_url(url, **self.config)
|
if not self._firecrawl:
|
||||||
|
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||||
|
|
||||||
|
return self._firecrawl.scrape_url(url, params=self.config)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ class FirecrawlSearchTool(BaseTool):
|
|||||||
|
|
||||||
return self._firecrawl.search(
|
return self._firecrawl.search(
|
||||||
query=query,
|
query=query,
|
||||||
**self.config,
|
params=self.config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user