fix: web scraper concatenate words

This commit is contained in:
Ernest Poletaev
2024-10-25 22:03:59 +07:00
parent 9eac65f9f6
commit 96e52767ad

View File

@@ -1,4 +1,5 @@
import os
import re
from typing import Any, Optional, Type
import requests
@@ -67,7 +68,6 @@ class ScrapeWebsiteTool(BaseTool):
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text()
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
text = parsed.get_text(" ")
text = re.sub('\s+', ' ', text)
return text