This commit is contained in:
WilliamEspegren
2024-05-23 12:03:48 +02:00
parent 70b5a3ab85
commit 5b7276c0bb
3 changed files with 45 additions and 38 deletions

View File

@@ -1,6 +1,7 @@
from typing import Optional, Any, Type, Dict, Literal
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
import requests
class SpiderFullParams(BaseModel):
request: Optional[str] = Field(description="The request type to perform. Possible values are `http`, `chrome`, and `smart`.")
@@ -64,12 +65,21 @@ class SpiderFullTool(BaseTool):
)
if params is None:
print("PARAMS IT NONE")
params = SpiderFullParams()
print(params)
action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
spider_docs = action(url=url, params=params.dict())
response = action(url=url, params=params.dict())
# Debugging: Print the response content
print(f"Response status code: {response.status_code}")
print(f"Response content: {response.text}")
try:
spider_docs = response.json()
except requests.exceptions.JSONDecodeError as e:
print(f"JSONDecodeError: {e}")
spider_docs = {"error": "Failed to decode JSON response"}
return spider_docs
tool = SpiderFullTool()
tool._run(url="https://spider.cloud")

View File

@@ -25,10 +25,15 @@ class SpiderTool(BaseTool):
self.spider = Spider(api_key=api_key)
def _run(self, url: str, params: Optional[Dict[str, Any]] = None, mode: Optional[Literal["scrape", "crawl"]] = "scrape"):
if mode != "scrape" and mode != "crawl":
def _run(
self,
url: str,
params: Optional[Dict[str, any]] = None,
mode: Optional[Literal["scrape", "crawl"]] = "scrape"
):
if mode not in ["scrape", "crawl"]:
raise ValueError(
"Unknown mode in `mode` parameter, `scrape` or `crawl` is the allowed modes"
"Unknown mode in `mode` parameter, `scrape` or `crawl` are the allowed modes"
)
if params is None or params == {}:

View File

@@ -1,38 +1,30 @@
import os
from crewai_tools.tools.spider_full_tool.spider_full_tool import SpiderFullTool, SpiderFullParams
from crewai import Agent, Task, Crew
def test_spider_tool():
spider_tool = SpiderFullTool()
def test_spider_full_tool():
spider_tool = SpiderFullTool(api_key="your_api_key")
url = "https://spider.cloud"
params = SpiderFullParams(
return_format="markdown"
request="http",
limit=1,
depth=1,
cache=True,
locale="en-US",
stealth=True,
headers={"User-Agent": "test-agent"},
metadata=False,
viewport="800x600",
encoding="UTF-8",
subdomains=False,
user_agent="test-agent",
store_data=False,
proxy_enabled=False,
query_selector=None,
full_resources=False,
request_timeout=30,
run_in_background=False
)
docs = spider_tool._run("https://spider.cloud", params=params)
docs = spider_tool._run(url=url, params=params)
print(docs)
# searcher = Agent(
# role="Web Research Expert",
# goal="Find related information from specific URL's",
# backstory="An expert web researcher that uses the web extremely well",
# tools=[spider_tool],
# verbose=True
# )
# summarize_spider = Task(
# description="Summarize the content of spider.cloud",
# expected_output="A summary that goes over what spider does",
# agent=searcher
# )
# crew = Crew(
# agents=[searcher],
# tasks=[summarize_spider],
# verbose=2
# )
# crew.kickoff()
if __name__ == "__main__":
test_spider_tool()
test_spider_full_tool()