mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Merge pull request #179 from crewAIInc/bugfix/improve-firecrawl-and-serper
improve serper and firecrawl
This commit is contained in:
@@ -35,6 +35,8 @@ from .tools import (
|
||||
ScrapeWebsiteTool,
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
SeleniumScrapingTool,
|
||||
SerpApiGoogleSearchTool,
|
||||
SerpApiGoogleShoppingTool,
|
||||
SerperDevTool,
|
||||
SerplyJobSearchTool,
|
||||
SerplyNewsSearchTool,
|
||||
@@ -44,11 +46,9 @@ from .tools import (
|
||||
SpiderTool,
|
||||
TXTSearchTool,
|
||||
VisionTool,
|
||||
WeaviateVectorSearchTool,
|
||||
WebsiteSearchTool,
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool,
|
||||
WeaviateVectorSearchTool,
|
||||
SerpApiGoogleSearchTool,
|
||||
SerpApiGoogleShoppingTool,
|
||||
)
|
||||
|
||||
@@ -26,19 +26,28 @@ from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
||||
from .multion_tool.multion_tool import MultiOnTool
|
||||
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
||||
from .nl2sql.nl2sql_tool import NL2SQLTool
|
||||
from .patronus_eval_tool.eval_tool import PatronusEvalTool
|
||||
from .patronus_eval_tool import (
|
||||
PatronusEvalTool,
|
||||
PatronusLocalEvaluatorTool,
|
||||
PatronusPredefinedCriteriaEvalTool,
|
||||
)
|
||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
||||
from .rag.rag_tool import RagTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import (
|
||||
ScrapeElementFromWebsiteTool,
|
||||
)
|
||||
from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import ScrapegraphScrapeTool, ScrapegraphScrapeToolSchema
|
||||
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
||||
from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import (
|
||||
ScrapegraphScrapeTool,
|
||||
ScrapegraphScrapeToolSchema,
|
||||
)
|
||||
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
)
|
||||
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
||||
from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool
|
||||
from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
|
||||
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
|
||||
@@ -48,12 +57,10 @@ from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMark
|
||||
from .spider_tool.spider_tool import SpiderTool
|
||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||
from .vision_tool.vision_tool import VisionTool
|
||||
from .weaviate_tool.vector_search import WeaviateVectorSearchTool
|
||||
from .website_search.website_search_tool import WebsiteSearchTool
|
||||
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||
YoutubeChannelSearchTool,
|
||||
)
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||
from .weaviate_tool.vector_search import WeaviateVectorSearchTool
|
||||
from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool
|
||||
from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
# Type checking import
|
||||
if TYPE_CHECKING:
|
||||
@@ -12,6 +11,14 @@ if TYPE_CHECKING:
|
||||
|
||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
crawler_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for crawling"
|
||||
)
|
||||
timeout: Optional[int] = Field(
|
||||
default=30000,
|
||||
description="Timeout in milliseconds for the crawling operation. The default value is 30000.",
|
||||
)
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
model_config = ConfigDict(
|
||||
@@ -20,25 +27,10 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
description: str = "Crawl webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
firecrawl_app: Optional["FirecrawlApp"] = None
|
||||
api_key: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
params: Optional[Dict[str, Any]] = None
|
||||
poll_interval: Optional[int] = 2
|
||||
idempotency_key: Optional[str] = None
|
||||
firecrawl: Optional["FirecrawlApp"] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
"""Initialize FirecrawlCrawlWebsiteTool.
|
||||
|
||||
Args:
|
||||
api_key (Optional[str]): Firecrawl API key. If not provided, will check FIRECRAWL_API_KEY env var.
|
||||
url (Optional[str]): Base URL to crawl. Can be overridden by the _run method.
|
||||
firecrawl_app (Optional[FirecrawlApp]): Previously created FirecrawlApp instance.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters to pass to the FirecrawlApp.
|
||||
poll_interval (Optional[int]): Poll interval for the FirecrawlApp.
|
||||
idempotency_key (Optional[str]): Idempotency key for the FirecrawlApp.
|
||||
**kwargs: Additional arguments passed to BaseTool.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
@@ -47,28 +39,29 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
# Allows passing a previously created FirecrawlApp instance
|
||||
# or builds a new one with the provided API key
|
||||
if not self.firecrawl_app:
|
||||
client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
|
||||
if not self.firecrawl:
|
||||
client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
|
||||
if not client_api_key:
|
||||
raise ValueError(
|
||||
"FIRECRAWL_API_KEY is not set. Please provide it either via the constructor "
|
||||
"with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable."
|
||||
)
|
||||
self.firecrawl_app = FirecrawlApp(api_key=client_api_key)
|
||||
self.firecrawl = FirecrawlApp(api_key=client_api_key)
|
||||
|
||||
def _run(self, url: str):
|
||||
# Unless url has been previously set via constructor by the user,
|
||||
# use the url argument provided by the agent at runtime.
|
||||
base_url = self.url or url
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
crawler_options: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[int] = 30000,
|
||||
):
|
||||
if crawler_options is None:
|
||||
crawler_options = {}
|
||||
|
||||
return self.firecrawl_app.crawl_url(
|
||||
base_url,
|
||||
params=self.params,
|
||||
poll_interval=self.poll_interval,
|
||||
idempotency_key=self.idempotency_key
|
||||
)
|
||||
options = {
|
||||
"crawlerOptions": crawler_options,
|
||||
"timeout": timeout,
|
||||
}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
||||
from typing import TYPE_CHECKING, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
@@ -10,14 +10,8 @@ if TYPE_CHECKING:
|
||||
|
||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
page_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for page scraping"
|
||||
)
|
||||
extractor_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for data extraction"
|
||||
)
|
||||
timeout: Optional[int] = Field(
|
||||
default=None,
|
||||
default=30000,
|
||||
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
|
||||
)
|
||||
|
||||
@@ -46,20 +40,15 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
page_options: Optional[Dict[str, Any]] = None,
|
||||
extractor_options: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[int] = None,
|
||||
timeout: Optional[int] = 30000,
|
||||
):
|
||||
if page_options is None:
|
||||
page_options = {}
|
||||
if extractor_options is None:
|
||||
extractor_options = {}
|
||||
if timeout is None:
|
||||
timeout = 30000
|
||||
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"extractorOptions": extractor_options,
|
||||
"formats": ["markdown"],
|
||||
"onlyMainContent": True,
|
||||
"includeTags": [],
|
||||
"excludeTags": [],
|
||||
"headers": {},
|
||||
"waitFor": 0,
|
||||
"timeout": timeout,
|
||||
}
|
||||
return self.firecrawl.scrape_url(url, options)
|
||||
|
||||
@@ -10,11 +10,22 @@ if TYPE_CHECKING:
|
||||
|
||||
class FirecrawlSearchToolSchema(BaseModel):
|
||||
query: str = Field(description="Search query")
|
||||
page_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for result formatting"
|
||||
limit: Optional[int] = Field(
|
||||
default=5, description="Maximum number of results to return"
|
||||
)
|
||||
search_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for searching"
|
||||
tbs: Optional[str] = Field(default=None, description="Time-based search parameter")
|
||||
lang: Optional[str] = Field(
|
||||
default="en", description="Language code for search results"
|
||||
)
|
||||
country: Optional[str] = Field(
|
||||
default="us", description="Country code for search results"
|
||||
)
|
||||
location: Optional[str] = Field(
|
||||
default=None, description="Location parameter for search results"
|
||||
)
|
||||
timeout: Optional[int] = Field(default=60000, description="Timeout in milliseconds")
|
||||
scrape_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for scraping search results"
|
||||
)
|
||||
|
||||
|
||||
@@ -39,13 +50,25 @@ class FirecrawlSearchTool(BaseTool):
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
page_options: Optional[Dict[str, Any]] = None,
|
||||
result_options: Optional[Dict[str, Any]] = None,
|
||||
limit: Optional[int] = 5,
|
||||
tbs: Optional[str] = None,
|
||||
lang: Optional[str] = "en",
|
||||
country: Optional[str] = "us",
|
||||
location: Optional[str] = None,
|
||||
timeout: Optional[int] = 60000,
|
||||
scrape_options: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
if page_options is None:
|
||||
page_options = {}
|
||||
if result_options is None:
|
||||
result_options = {}
|
||||
if scrape_options is None:
|
||||
scrape_options = {}
|
||||
|
||||
options = {"pageOptions": page_options, "resultOptions": result_options}
|
||||
return self.firecrawl.search(query, **options)
|
||||
options = {
|
||||
"query": query,
|
||||
"limit": limit,
|
||||
"tbs": tbs,
|
||||
"lang": lang,
|
||||
"country": country,
|
||||
"location": location,
|
||||
"timeout": timeout,
|
||||
"scrapeOptions": scrape_options,
|
||||
}
|
||||
return self.firecrawl.search(**options)
|
||||
|
||||
3
src/crewai_tools/tools/patronus_eval_tool/__init__.py
Normal file
3
src/crewai_tools/tools/patronus_eval_tool/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .patronus_eval_tool import PatronusEvalTool
|
||||
from .patronus_local_evaluator_tool import PatronusLocalEvaluatorTool
|
||||
from .patronus_predefined_criteria_eval_tool import PatronusPredefinedCriteriaEvalTool
|
||||
@@ -35,7 +35,7 @@ class SerperDevToolSchema(BaseModel):
|
||||
|
||||
|
||||
class SerperDevTool(BaseTool):
|
||||
name: str = "Search the internet"
|
||||
name: str = "Search the internet with Serper"
|
||||
description: str = (
|
||||
"A tool that can be used to search the internet with a search_query. "
|
||||
"Supports different search types: 'search' (default), 'news'"
|
||||
|
||||
Reference in New Issue
Block a user