mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
fix: web scraper concatenate words
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -67,7 +68,6 @@ class ScrapeWebsiteTool(BaseTool):
|
|||||||
page.encoding = page.apparent_encoding
|
page.encoding = page.apparent_encoding
|
||||||
parsed = BeautifulSoup(page.text, "html.parser")
|
parsed = BeautifulSoup(page.text, "html.parser")
|
||||||
|
|
||||||
text = parsed.get_text()
|
text = parsed.get_text(" ")
|
||||||
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
|
text = re.sub('\s+', ' ', text)
|
||||||
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
|
|
||||||
return text
|
return text
|
||||||
|
|||||||
Reference in New Issue
Block a user