feat: add descriptive header to scraped website content output (#426)

* feat: add descriptive header to scraped website content output

* fix: correct typo in scraped website content header text
This commit is contained in:
Mike Plachta
2025-08-26 08:10:24 -07:00
committed by GitHub
parent dc039cfac8
commit 403bb7e208

View File

@@ -65,7 +65,8 @@ class ScrapeWebsiteTool(BaseTool):
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text(" ")
text = "The following text is scraped website content:\n\n"
text += parsed.get_text(" ")
text = re.sub("[ \t]+", " ", text)
text = re.sub("\\s+\n\\s+", "\n", text)
return text