Move off v1

2026-01-28 01:28:14 +00:00 · 2024-09-03 15:57:29 -04:00
parent d19bba72b0
commit 35fe222ca1
39 changed files with 752 additions and 550 deletions
--- a/src/crewai_tools/tools/spider_tool/spider_tool.py
+++ b/src/crewai_tools/tools/spider_tool/spider_tool.py
@@ -1,21 +1,25 @@
-from typing import Optional, Any, Type, Dict, Literal
-from pydantic.v1 import BaseModel, Field
+from typing import Any, Dict, Literal, Optional, Type
+
+from pydantic import BaseModel, Field
+
 from crewai_tools.tools.base_tool import BaseTool

+
 class SpiderToolSchema(BaseModel):
    url: str = Field(description="Website URL")
    params: Optional[Dict[str, Any]] = Field(
        description="Set additional params. Options include:\n"
-                    "- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
-                    "- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
-                    "- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
-                    "- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
+        "- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
+        "- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
+        "- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
+        "- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
    )
    mode: Literal["scrape", "crawl"] = Field(
        default="scrape",
-        description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set."
+        description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.",
    )

+
 class SpiderTool(BaseTool):
    name: str = "Spider scrape & crawl tool"
    description: str = "Scrape & Crawl any url and return LLM-ready data."
@@ -26,11 +30,11 @@ class SpiderTool(BaseTool):
    def __init__(self, api_key: Optional[str] = None, **kwargs):
        super().__init__(**kwargs)
        try:
-            from spider import Spider # type: ignore
+            from spider import Spider  # type: ignore
        except ImportError:
-           raise ImportError(
-               "`spider-client` package not found, please run `pip install spider-client`"
-           )
+            raise ImportError(
+                "`spider-client` package not found, please run `pip install spider-client`"
+            )

        self.spider = Spider(api_key=api_key)

@@ -38,7 +42,7 @@ class SpiderTool(BaseTool):
        self,
        url: str,
        params: Optional[Dict[str, Any]] = None,
-        mode: Optional[Literal["scrape", "crawl"]] = "scrape"
+        mode: Optional[Literal["scrape", "crawl"]] = "scrape",
    ):
        if mode not in ["scrape", "crawl"]:
            raise ValueError(
@@ -51,9 +55,7 @@ class SpiderTool(BaseTool):
        else:
            params = {"return_format": "markdown"}

-        action = (
-            self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
-        )
+        action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
        spider_docs = action(url=url, params=params)

        return spider_docs