adding google search, sholar, and news

This commit is contained in:
teampen
2024-06-10 21:15:21 -04:00
parent 53c7d815ae
commit d8b8edab08
6 changed files with 331 additions and 0 deletions

View File

@@ -23,4 +23,7 @@ from .tools import (
XMLSearchTool,
YoutubeChannelSearchTool,
YoutubeVideoSearchTool,
SerplyWebSearchTool,
SerplyNewsSearchTool,
SerplyScholarSearchTool
)

View File

@@ -21,3 +21,6 @@ from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool

View File

@@ -0,0 +1,67 @@
# Serply API Documentation
## Description
This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Examples
## Web Search
The following example demonstrates how to initialize the tool and execute a search the web with a given query:
```python
from crewai_tools import SerplyWebSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyWebSearchTool()
# increase search limits to 100 results
tool = SerplyWebSearchTool(limit=100)
# change results language (fr - French)
tool = SerplyWebSearchTool(hl="fr")
```
## News Search
The following example demonstrates how to initialize the tool and execute a search news with a given query:
```python
from crewai_tools import SerplyNewsSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyNewsSearchTool()
# change country news (JP - Japan)
tool = SerplyNewsSearchTool(proxy_location="JP")
```
## Scholar Search
The following example demonstrates how to initialize the tool and execute a search scholar articles a given query:
```python
from crewai_tools import SerplyScholarSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyScholarSearchTool()
# change country news (GB - Great Britain)
tool = SerplyScholarSearchTool(proxy_location="GB")
```
## Steps to Get Started
To effectively use the `SerplyApiTool`, follow these steps:
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io).
3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool.
## Conclusion
By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.

View File

@@ -0,0 +1,80 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
class SerplyNewsSearchTool(BaseTool):
name: str = "News Search"
description: str = "A tool to perform News article search with a search_query."
args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema
search_url: str = "https://api.serply.io/v1/news/"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
limit: Optional[int] = 10
def __init__(
self,
limit: Optional[int] = 10,
proxy_location: Optional[str] = "US",
**kwargs
):
"""
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
# build query parameters
query_payload = {}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "entries" in results:
results = results['entries']
string = []
for result in results[:self.limit]:
try:
# follow url
r = requests.get(result['link'])
final_link = r.history[-1].headers['Location']
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {final_link}",
f"Source: {result['source']['title']}",
f"Published: {result['published']}",
"---"
]))
except KeyError:
next
content = '\n'.join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -0,0 +1,85 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
class SerplyScholarSearchTool(BaseTool):
name: str = "Scholar Search"
description: str = "A tool to perform News article search with a search_query."
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
search_url: str = "https://api.serply.io/v1/scholar/"
hl: Optional[str] = "us"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
proxy_location: Optional[str] = "US",
**kwargs
):
"""
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.hl = hl
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
query_payload = {
"hl": self.hl
}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
articles = response.json().get("articles", "")
if not articles:
return ""
string = []
for article in articles:
try:
if "doc" in article:
link = article['doc']['link']
else:
link = article['link']
authors = [author['name'] for author in article['author']['authors']]
string.append('\n'.join([
f"Title: {article['title']}",
f"Link: {link}",
f"Description: {article['description']}",
f"Cite: {article['cite']}",
f"Authors: {', '.join(authors)}",
"---"
]))
except KeyError:
next
content = '\n'.join(string)
return f"\nSearch results: {content}\n"

View File

@@ -0,0 +1,93 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
class SerplyWebSearchTool(BaseTool):
name: str = "Google Search"
description: str = "A tool to perform Google search with a search_query."
args_schema: Type[BaseModel] = SerplyWebSearchToolSchema
search_url: str = "https://api.serply.io/v1/search/"
hl: Optional[str] = "us"
limit: Optional[int] = 10
device_type: Optional[str] = "desktop"
proxy_location: Optional[str] = "US"
query_payload: Optional[dict] = {}
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
limit: int = 10,
device_type: str = "desktop",
proxy_location: str = "US",
**kwargs
):
"""
param: query (str): The query to search for
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
param: device_type (str): desktop/mobile results (defaults to desktop)
proxy_location: (str): Where to perform the search, specifically for local/regional results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
self.device_type = device_type
self.proxy_location = proxy_location
# build query parameters
self.query_payload = {
"num": limit,
"gl": proxy_location.upper(),
"hl": hl.lower()
}
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"X-User-Agent": device_type,
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
if "query" in kwargs:
self.query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
self.query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(self.query_payload)}"
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "results" in results:
results = results['results']
string = []
for result in results:
try:
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Description: {result['description'].strip()}",
"---"
]))
except KeyError:
next
content = '\n'.join(string)
return f"\nSearch results: {content}\n"
else:
return results