feat: add SerperScrapeWebsiteTool for extracting clean content from URLs (#392)

* feat: add SerperScrapeWebsiteTool for extracting clean content from URLs * feat: add required SERPER_API_KEY env var validation to SerperScrapeWebsiteTool
2026-01-08 15:48:29 +00:00 · 2025-07-23 10:22:47 -07:00
parent c3e87fc31f
commit 104485d18b
3 changed files with 82 additions and 0 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -61,6 +61,7 @@ from .tools import (
    SerpApiGoogleSearchTool,
    SerpApiGoogleShoppingTool,
    SerperDevTool,
+    SerperScrapeWebsiteTool,
    SerplyJobSearchTool,
    SerplyNewsSearchTool,
    SerplyScholarSearchTool,
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -74,6 +74,7 @@ from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
 from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool
 from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool
 from .serper_dev_tool.serper_dev_tool import SerperDevTool
+from .serper_scrape_website_tool.serper_scrape_website_tool import SerperScrapeWebsiteTool
 from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
 from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
 from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
--- a/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py
+++ b/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py
@@ -0,0 +1,80 @@
+from crewai.tools import BaseTool, EnvVar
+from typing import Type, List
+from pydantic import BaseModel, Field
+import requests
+import json
+import os
+
+
+class SerperScrapeWebsiteInput(BaseModel):
+    """Input schema for SerperScrapeWebsite."""
+    url: str = Field(..., description="The URL of the website to scrape")
+    include_markdown: bool = Field(
+        default=True, 
+        description="Whether to include markdown formatting in the scraped content"
+    )
+
+
+class SerperScrapeWebsiteTool(BaseTool):
+    name: str = "serper_scrape_website"
+    description: str = (
+        "Scrapes website content using Serper's scraping API. "
+        "This tool can extract clean, readable content from any website URL, "
+        "optionally including markdown formatting for better structure."
+    )
+    args_schema: Type[BaseModel] = SerperScrapeWebsiteInput
+    env_vars: List[EnvVar] = [
+        EnvVar(name="SERPER_API_KEY", description="API key for Serper", required=True),
+    ]
+
+    def _run(self, url: str, include_markdown: bool = True) -> str:
+        """
+        Scrape website content using Serper API.
+        
+        Args:
+            url: The URL to scrape
+            include_markdown: Whether to include markdown formatting
+            
+        Returns:
+            Scraped website content as a string
+        """
+        try:
+            # Serper API endpoint
+            api_url = "https://scrape.serper.dev"
+            
+            # Get API key from environment variable for security
+            api_key = os.getenv('SERPER_API_KEY')
+            
+            # Prepare the payload
+            payload = json.dumps({
+                "url": url,
+                "includeMarkdown": include_markdown
+            })
+            
+            # Set headers
+            headers = {
+                'X-API-KEY': api_key,
+                'Content-Type': 'application/json'
+            }
+            
+            # Make the API request
+            response = requests.post(api_url, headers=headers, data=payload)
+            
+            # Check if request was successful
+            if response.status_code == 200:
+                result = response.json()
+                
+                # Extract the scraped content
+                if 'text' in result:
+                    return result['text']
+                else:
+                    return f"Successfully scraped {url}, but no text content found in response: {response.text}"
+            else:
+                return f"Error scraping {url}: HTTP {response.status_code} - {response.text}"
+                
+        except requests.exceptions.RequestException as e:
+            return f"Network error while scraping {url}: {str(e)}"
+        except json.JSONDecodeError as e:
+            return f"Error parsing JSON response while scraping {url}: {str(e)}"
+        except Exception as e:
+            return f"Unexpected error while scraping {url}: {str(e)}"