diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 7831b957d..9d9796165 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -61,6 +61,7 @@ from .tools import ( SerpApiGoogleSearchTool, SerpApiGoogleShoppingTool, SerperDevTool, + SerperScrapeWebsiteTool, SerplyJobSearchTool, SerplyNewsSearchTool, SerplyScholarSearchTool, diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index d4b54c5ff..091fac62b 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -74,6 +74,7 @@ from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool from .serper_dev_tool.serper_dev_tool import SerperDevTool +from .serper_scrape_website_tool.serper_scrape_website_tool import SerperScrapeWebsiteTool from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool diff --git a/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py new file mode 100644 index 000000000..cefb431f4 --- /dev/null +++ b/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -0,0 +1,80 @@ +from crewai.tools import BaseTool, EnvVar +from typing import Type, List +from pydantic import BaseModel, Field +import requests +import json +import os + + +class SerperScrapeWebsiteInput(BaseModel): + """Input schema for SerperScrapeWebsite.""" + url: str = Field(..., description="The URL of the website to scrape") + include_markdown: bool = Field( + default=True, + description="Whether to include markdown formatting in the scraped content" + ) + + +class SerperScrapeWebsiteTool(BaseTool): + name: str = "serper_scrape_website" + description: str = ( + "Scrapes website content using Serper's scraping API. " + "This tool can extract clean, readable content from any website URL, " + "optionally including markdown formatting for better structure." + ) + args_schema: Type[BaseModel] = SerperScrapeWebsiteInput + env_vars: List[EnvVar] = [ + EnvVar(name="SERPER_API_KEY", description="API key for Serper", required=True), + ] + + def _run(self, url: str, include_markdown: bool = True) -> str: + """ + Scrape website content using Serper API. + + Args: + url: The URL to scrape + include_markdown: Whether to include markdown formatting + + Returns: + Scraped website content as a string + """ + try: + # Serper API endpoint + api_url = "https://scrape.serper.dev" + + # Get API key from environment variable for security + api_key = os.getenv('SERPER_API_KEY') + + # Prepare the payload + payload = json.dumps({ + "url": url, + "includeMarkdown": include_markdown + }) + + # Set headers + headers = { + 'X-API-KEY': api_key, + 'Content-Type': 'application/json' + } + + # Make the API request + response = requests.post(api_url, headers=headers, data=payload) + + # Check if request was successful + if response.status_code == 200: + result = response.json() + + # Extract the scraped content + if 'text' in result: + return result['text'] + else: + return f"Successfully scraped {url}, but no text content found in response: {response.text}" + else: + return f"Error scraping {url}: HTTP {response.status_code} - {response.text}" + + except requests.exceptions.RequestException as e: + return f"Network error while scraping {url}: {str(e)}" + except json.JSONDecodeError as e: + return f"Error parsing JSON response while scraping {url}: {str(e)}" + except Exception as e: + return f"Unexpected error while scraping {url}: {str(e)}"