spider tool

2026-01-28 01:28:14 +00:00 · 2024-05-21 11:48:52 +02:00
parent 53c7d815ae
commit 4e1425665c
2 changed files with 71 additions and 0 deletions
--- a/src/crewai_tools/tools/spider_crawl_tool/README.md
+++ b/src/crewai_tools/tools/spider_crawl_tool/README.md
@@ -0,0 +1,27 @@
 # SpiderTool
 ## Description
 [Spider](https://spider.cloud) is the [fastest]([Spider](https://spider.cloud/?ref=crewai) is the [fastest](https://github.com/spider-rs/spider/blob/main/benches/BENCHMARKS.md#benchmark-results) open source scraper and crawler that returns LLM-ready data. It converts any website into pure HTML, markdown, metadata or text while enabling you to crawl with custom actions using AI.
 ## Installation
 To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK too:
 ```python
 pip install spider-client 'crewai[tools]'
 ```
 ## Example
 This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is already LLM-ready, so no need to do any cleaning there.
 ```python
 from crewai_tools import SpiderTool
 tool = SpiderTool()
 ```
 ## Arguments
 - `api_key`: Optional. Specifies Spider API key. If not specified it looks for `SPIDER_API_KEY` in environment variables.
--- a/src/crewai_tools/tools/spider_crawl_tool/spider_tool.py
+++ b/src/crewai_tools/tools/spider_crawl_tool/spider_tool.py
@@ -0,0 +1,44 @@
 from typing import Optional, Any, Type, Dict, Literal
 from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.base_tool import BaseTool
 class SpiderToolSchema(BaseModel):
    url: str = Field(description="Website URL")
    params: Optional[Dict[str, Any]] = Field(default={"return_format": "markdown"}, description="Specified Params, see https://spider.cloud/docs/api for all availabe params")
    mode: Optional[Literal["scrape", "crawl"]] = Field(defualt="scrape", description="Mode, either `scrape` or `crawl` the url")
 class SpiderTool(BaseTool):
    name: str = "Spider scrape & crawl tool"
    description: str = "Scrape & Crawl any url and return LLM-ready data."
    args_schema: Type[BaseModel] = SpiderToolSchema
    api_key: Optional[str] = None
    spider: Optional[Any] = None
    def __init__(self, api_key: Optional[str] = None, **kwargs):
        super().__init__(**kwargs)
        try:
            from spider import Spider # type: ignore
        except ImportError:
           raise ImportError(
               "`spider-client` package not found, please run `pip install spider-client`"
           )
        self.spider = Spider(api_key=api_key)
    def _run(self, url: str, params: Optional[Dict[str, Any]] = None, mode: Optional[Literal["scrape", "crawl"]] = "scrape"):
        if mode != "scrape" and mode != "crawl":
            raise ValueError(
                "Unknown mode in `mode` parameter, `scrape` or `crawl` is the allowed modes"
            )
        if params is None:
            params = {"return_format": "markdown"}
        action = (
            self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
        )
        spider_docs = action(url=url, params=params)
        print(spider_docs)
        return spider_docs