fixing serper tool

This commit is contained in:
João Moura
2024-07-19 00:28:27 -04:00
parent d8c98f2e64
commit 0386120a5a
2 changed files with 28 additions and 19 deletions

View File

@@ -49,9 +49,11 @@ class ScrapeWebsiteTool(BaseTool):
headers=self.headers,
cookies=self.cookies if self.cookies else {}
)
parsed = BeautifulSoup(page.content, "html.parser")
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text()
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
return text

View File

@@ -1,3 +1,4 @@
import datetime
import os
import json
import requests
@@ -23,11 +24,11 @@ class SerperDevTool(BaseTool):
description: str = "A tool that can be used to search the internet with a search_query."
args_schema: Type[BaseModel] = SerperDevToolSchema
search_url: str = "https://google.serper.dev/search"
country: Optional[str] = None
location: Optional[str] = None
locale: Optional[str] = None
n_results: int = Field(default=10, description="Number of search results to return")
save_file: bool = Field(default=False, description="Flag to determine whether to save the results to a file")
country: Optional[str] = ''
location: Optional[str] = ''
locale: Optional[str] = ''
n_results: int = 10
save_file: bool = False
def _run(
self,
@@ -39,9 +40,13 @@ class SerperDevTool(BaseTool):
n_results = kwargs.get('n_results', self.n_results)
payload = { "q": search_query, "num": n_results }
payload["gl"] = self.country if self.country
payload["location"] = self.country if self.location
payload["hl"] = self.country if self.locale
if self.country != '':
payload["gl"] = self.country
if self.location != '':
payload["location"] = self.location
if self.locale != '':
payload["hl"] = self.locale
payload = json.dumps(payload)
@@ -49,8 +54,10 @@ class SerperDevTool(BaseTool):
'X-API-KEY': os.environ['SERPER_API_KEY'],
'content-type': 'application/json'
}
response = requests.request("POST", self.search_url, headers=headers, data=payload)
results = response.json()
if 'organic' in results:
results = results['organic'][:self.n_results]
string = []