mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-28 09:38:17 +00:00
Adding timeout to scrapping website tool
This commit is contained in:
@@ -44,7 +44,12 @@ class ScrapeWebsiteTool(BaseTool):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
website_url = kwargs.get('website_url', self.website_url)
|
website_url = kwargs.get('website_url', self.website_url)
|
||||||
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
|
page = requests.get(
|
||||||
|
website_url,
|
||||||
|
timeout=15,
|
||||||
|
headers=self.headers,
|
||||||
|
cookies=self.cookies if self.cookies else {}
|
||||||
|
)
|
||||||
parsed = BeautifulSoup(page.content, "html.parser")
|
parsed = BeautifulSoup(page.content, "html.parser")
|
||||||
text = parsed.get_text()
|
text = parsed.get_text()
|
||||||
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
||||||
|
|||||||
Reference in New Issue
Block a user