From 5b7276c0bb75c2557ce0e7b6f08d316fb0a6426e Mon Sep 17 00:00:00 2001 From: WilliamEspegren Date: Thu, 23 May 2024 12:03:48 +0200 Subject: [PATCH] x --- .../spider_full_tool/spider_full_tool.py | 18 +++++-- .../tools/spider_tool/spider_tool.py | 11 ++-- tests/spider_full_tool_test.py | 54 ++++++++----------- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/src/crewai_tools/tools/spider_full_tool/spider_full_tool.py b/src/crewai_tools/tools/spider_full_tool/spider_full_tool.py index e1041b701..5d8ea6eda 100644 --- a/src/crewai_tools/tools/spider_full_tool/spider_full_tool.py +++ b/src/crewai_tools/tools/spider_full_tool/spider_full_tool.py @@ -1,6 +1,7 @@ from typing import Optional, Any, Type, Dict, Literal from pydantic.v1 import BaseModel, Field from crewai_tools.tools.base_tool import BaseTool +import requests class SpiderFullParams(BaseModel): request: Optional[str] = Field(description="The request type to perform. Possible values are `http`, `chrome`, and `smart`.") @@ -64,12 +65,21 @@ class SpiderFullTool(BaseTool): ) if params is None: + print("PARAMS IT NONE") params = SpiderFullParams() + print(params) action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url - spider_docs = action(url=url, params=params.dict()) + response = action(url=url, params=params.dict()) + + # Debugging: Print the response content + print(f"Response status code: {response.status_code}") + print(f"Response content: {response.text}") + + try: + spider_docs = response.json() + except requests.exceptions.JSONDecodeError as e: + print(f"JSONDecodeError: {e}") + spider_docs = {"error": "Failed to decode JSON response"} return spider_docs - -tool = SpiderFullTool() -tool._run(url="https://spider.cloud") \ No newline at end of file diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index 3495d55c9..e020a599a 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -25,10 +25,15 @@ class SpiderTool(BaseTool): self.spider = Spider(api_key=api_key) - def _run(self, url: str, params: Optional[Dict[str, Any]] = None, mode: Optional[Literal["scrape", "crawl"]] = "scrape"): - if mode != "scrape" and mode != "crawl": + def _run( + self, + url: str, + params: Optional[Dict[str, any]] = None, + mode: Optional[Literal["scrape", "crawl"]] = "scrape" + ): + if mode not in ["scrape", "crawl"]: raise ValueError( - "Unknown mode in `mode` parameter, `scrape` or `crawl` is the allowed modes" + "Unknown mode in `mode` parameter, `scrape` or `crawl` are the allowed modes" ) if params is None or params == {}: diff --git a/tests/spider_full_tool_test.py b/tests/spider_full_tool_test.py index f00c0ec9c..220acfb49 100644 --- a/tests/spider_full_tool_test.py +++ b/tests/spider_full_tool_test.py @@ -1,38 +1,30 @@ -import os from crewai_tools.tools.spider_full_tool.spider_full_tool import SpiderFullTool, SpiderFullParams -from crewai import Agent, Task, Crew - -def test_spider_tool(): - spider_tool = SpiderFullTool() +def test_spider_full_tool(): + spider_tool = SpiderFullTool(api_key="your_api_key") + url = "https://spider.cloud" params = SpiderFullParams( - return_format="markdown" + request="http", + limit=1, + depth=1, + cache=True, + locale="en-US", + stealth=True, + headers={"User-Agent": "test-agent"}, + metadata=False, + viewport="800x600", + encoding="UTF-8", + subdomains=False, + user_agent="test-agent", + store_data=False, + proxy_enabled=False, + query_selector=None, + full_resources=False, + request_timeout=30, + run_in_background=False ) - - docs = spider_tool._run("https://spider.cloud", params=params) + docs = spider_tool._run(url=url, params=params) print(docs) - - # searcher = Agent( - # role="Web Research Expert", - # goal="Find related information from specific URL's", - # backstory="An expert web researcher that uses the web extremely well", - # tools=[spider_tool], - # verbose=True - # ) - - # summarize_spider = Task( - # description="Summarize the content of spider.cloud", - # expected_output="A summary that goes over what spider does", - # agent=searcher - # ) - - # crew = Crew( - # agents=[searcher], - # tasks=[summarize_spider], - # verbose=2 - # ) - - # crew.kickoff() if __name__ == "__main__": - test_spider_tool() \ No newline at end of file + test_spider_full_tool()