mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
Improved readme based on recommendations—added more advanced usage examples
This commit is contained in:
@@ -20,13 +20,43 @@ from crewai_tools import SpiderTool
|
|||||||
spider_tool = SpiderTool(api_key='YOUR_API_KEY')
|
spider_tool = SpiderTool(api_key='YOUR_API_KEY')
|
||||||
|
|
||||||
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
|
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
|
||||||
spider_tool = SpiderTool(website_url='https://www.example.com')
|
spider_tool = SpiderTool(website_url='https://spider.cloud')
|
||||||
|
|
||||||
# Pass in custom parameters, see below for more details
|
# Pass in custom parameters, see below for more details
|
||||||
spider_tool = SpiderTool(
|
spider_tool = SpiderTool(
|
||||||
website_url='https://www.example.com',
|
website_url='https://spider.cloud',
|
||||||
custom_params={"depth": 2, "anti_bot": True, "proxy_enabled": True}
|
custom_params={"depth": 2, "anti_bot": True, "proxy_enabled": True}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Advanced usage using css query selector to extract content
|
||||||
|
css_extraction_map = {
|
||||||
|
"/": [ # pass in path (main index in this case)
|
||||||
|
{
|
||||||
|
"name": "headers", # give it a name for this element
|
||||||
|
"selectors": [
|
||||||
|
"h1"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
spider_tool = SpiderTool(
|
||||||
|
website_url='https://spider.cloud',
|
||||||
|
custom_params={"anti_bot": True, "proxy_enabled": True, "metadata": True, "css_extraction_map": css_extraction_map}
|
||||||
|
)
|
||||||
|
|
||||||
|
### Response (extracted text will be in the metadata)
|
||||||
|
"css_extracted": {
|
||||||
|
"headers": [
|
||||||
|
"The Web Crawler for AI Agents and LLMs!"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
## Agent setup
|
||||||
|
```yaml
|
||||||
|
researcher:
|
||||||
|
role: >
|
||||||
|
You're a researcher that is tasked with researching a website and it's content (use crawl mode). The website is to crawl is: {website_url}.
|
||||||
```
|
```
|
||||||
|
|
||||||
## Arguments
|
## Arguments
|
||||||
|
|||||||
Reference in New Issue
Block a user