mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
x
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from typing import Optional, Any, Type, Dict, Literal
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
import requests
|
||||
|
||||
class SpiderFullParams(BaseModel):
|
||||
request: Optional[str] = Field(description="The request type to perform. Possible values are `http`, `chrome`, and `smart`.")
|
||||
@@ -64,12 +65,21 @@ class SpiderFullTool(BaseTool):
|
||||
)
|
||||
|
||||
if params is None:
|
||||
print("PARAMS IT NONE")
|
||||
params = SpiderFullParams()
|
||||
print(params)
|
||||
|
||||
action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
|
||||
spider_docs = action(url=url, params=params.dict())
|
||||
response = action(url=url, params=params.dict())
|
||||
|
||||
# Debugging: Print the response content
|
||||
print(f"Response status code: {response.status_code}")
|
||||
print(f"Response content: {response.text}")
|
||||
|
||||
try:
|
||||
spider_docs = response.json()
|
||||
except requests.exceptions.JSONDecodeError as e:
|
||||
print(f"JSONDecodeError: {e}")
|
||||
spider_docs = {"error": "Failed to decode JSON response"}
|
||||
|
||||
return spider_docs
|
||||
|
||||
tool = SpiderFullTool()
|
||||
tool._run(url="https://spider.cloud")
|
||||
@@ -25,10 +25,15 @@ class SpiderTool(BaseTool):
|
||||
|
||||
self.spider = Spider(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, params: Optional[Dict[str, Any]] = None, mode: Optional[Literal["scrape", "crawl"]] = "scrape"):
|
||||
if mode != "scrape" and mode != "crawl":
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, any]] = None,
|
||||
mode: Optional[Literal["scrape", "crawl"]] = "scrape"
|
||||
):
|
||||
if mode not in ["scrape", "crawl"]:
|
||||
raise ValueError(
|
||||
"Unknown mode in `mode` parameter, `scrape` or `crawl` is the allowed modes"
|
||||
"Unknown mode in `mode` parameter, `scrape` or `crawl` are the allowed modes"
|
||||
)
|
||||
|
||||
if params is None or params == {}:
|
||||
|
||||
@@ -1,38 +1,30 @@
|
||||
import os
|
||||
from crewai_tools.tools.spider_full_tool.spider_full_tool import SpiderFullTool, SpiderFullParams
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
def test_spider_tool():
|
||||
spider_tool = SpiderFullTool()
|
||||
|
||||
def test_spider_full_tool():
|
||||
spider_tool = SpiderFullTool(api_key="your_api_key")
|
||||
url = "https://spider.cloud"
|
||||
params = SpiderFullParams(
|
||||
return_format="markdown"
|
||||
request="http",
|
||||
limit=1,
|
||||
depth=1,
|
||||
cache=True,
|
||||
locale="en-US",
|
||||
stealth=True,
|
||||
headers={"User-Agent": "test-agent"},
|
||||
metadata=False,
|
||||
viewport="800x600",
|
||||
encoding="UTF-8",
|
||||
subdomains=False,
|
||||
user_agent="test-agent",
|
||||
store_data=False,
|
||||
proxy_enabled=False,
|
||||
query_selector=None,
|
||||
full_resources=False,
|
||||
request_timeout=30,
|
||||
run_in_background=False
|
||||
)
|
||||
|
||||
docs = spider_tool._run("https://spider.cloud", params=params)
|
||||
docs = spider_tool._run(url=url, params=params)
|
||||
print(docs)
|
||||
|
||||
# searcher = Agent(
|
||||
# role="Web Research Expert",
|
||||
# goal="Find related information from specific URL's",
|
||||
# backstory="An expert web researcher that uses the web extremely well",
|
||||
# tools=[spider_tool],
|
||||
# verbose=True
|
||||
# )
|
||||
|
||||
# summarize_spider = Task(
|
||||
# description="Summarize the content of spider.cloud",
|
||||
# expected_output="A summary that goes over what spider does",
|
||||
# agent=searcher
|
||||
# )
|
||||
|
||||
# crew = Crew(
|
||||
# agents=[searcher],
|
||||
# tasks=[summarize_spider],
|
||||
# verbose=2
|
||||
# )
|
||||
|
||||
# crew.kickoff()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_spider_tool()
|
||||
test_spider_full_tool()
|
||||
|
||||
Reference in New Issue
Block a user