Improved readme based on recommendations—added more advanced usage examples

2026-01-11 00:58:30 +00:00 · 2024-12-17 20:54:07 -08:00
parent 73b803ddc3
commit 1bbac87e70
1 changed files with 32 additions and 2 deletions
--- a/src/crewai_tools/tools/spider_tool/README.md
+++ b/src/crewai_tools/tools/spider_tool/README.md
@@ -20,13 +20,43 @@ from crewai_tools import SpiderTool
 spider_tool = SpiderTool(api_key='YOUR_API_KEY')
 # Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
-spider_tool = SpiderTool(website_url='https://www.example.com')
+spider_tool = SpiderTool(website_url='https://spider.cloud')
 # Pass in custom parameters, see below for more details
 spider_tool = SpiderTool(
-    website_url='https://www.example.com',
+    website_url='https://spider.cloud',
    custom_params={"depth": 2, "anti_bot": True, "proxy_enabled": True}
 )
 # Advanced usage using css query selector to extract content
 css_extraction_map = {
    "/": [ # pass in path (main index in this case)
        {
            "name": "headers", # give it a name for this element
            "selectors": [
                "h1"
            ]
        }
    ]
 }
 spider_tool = SpiderTool(
    website_url='https://spider.cloud',
    custom_params={"anti_bot": True, "proxy_enabled": True, "metadata": True, "css_extraction_map": css_extraction_map}
 )
 ### Response (extracted text will be in the metadata)
 "css_extracted": {
    "headers": [
        "The Web Crawler for AI Agents and LLMs!"
    ]
 }
 ```
 ## Agent setup
 ```yaml
 researcher:
  role: >
    You're a researcher that is tasked with researching a website and it's content (use crawl mode). The website is to crawl is: {website_url}.
 ```
 ## Arguments