Move off v1

This commit is contained in:
Brandon Hancock
2024-09-03 15:57:29 -04:00
parent d19bba72b0
commit 35fe222ca1
39 changed files with 752 additions and 550 deletions

View File

@@ -1,12 +1,23 @@
from typing import Optional, Any, Type, Dict
from pydantic.v1 import BaseModel, Field
from typing import Any, Dict, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
timeout: Optional[int] = Field(default=None, description="Timeout in milliseconds for the scraping operation. The default value is 30000.")
page_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for page scraping"
)
extractor_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for data extraction"
)
timeout: Optional[int] = Field(
default=None,
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
)
class FirecrawlScrapeWebsiteTool(BaseTool):
name: str = "Firecrawl web scrape tool"
@@ -18,15 +29,21 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
def _run(
self,
url: str,
page_options: Optional[Dict[str, Any]] = None,
extractor_options: Optional[Dict[str, Any]] = None,
timeout: Optional[int] = None,
):
if page_options is None:
page_options = {}
if extractor_options is None:
@@ -37,6 +54,6 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
options = {
"pageOptions": page_options,
"extractorOptions": extractor_options,
"timeout": timeout
"timeout": timeout,
}
return self.firecrawl.scrape_url(url, options)
return self.firecrawl.scrape_url(url, options)