Merge pull request #53 from googio/adding-serply-api

Adding Serply API
This commit is contained in:
João Moura
2024-07-14 13:37:03 -07:00
committed by GitHub
8 changed files with 512 additions and 2 deletions

View File

@@ -24,10 +24,15 @@ from .tools import (
ScrapeWebsiteTool,
SeleniumScrapingTool,
SerperDevTool,
SerplyWebSearchTool,
SerplyNewsSearchTool,
SerplyScholarSearchTool,
SerplyWebpageToMarkdownTool,
SerplyJobSearchTool,
TXTSearchTool,
WebsiteSearchTool,
XMLSearchTool,
YoutubeChannelSearchTool,
YoutubeVideoSearchTool,
YoutubeVideoSearchTool
)
from .tools.base_tool import BaseTool, Tool, tool

View File

@@ -25,10 +25,15 @@ from .scrape_element_from_website.scrape_element_from_website import (
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
from .serper_dev_tool.serper_dev_tool import SerperDevTool
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
from .txt_search_tool.txt_search_tool import TXTSearchTool
from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import (
YoutubeChannelSearchTool,
)
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchToo

View File

@@ -0,0 +1,117 @@
# Serply API Documentation
## Description
This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Examples
## Web Search
The following example demonstrates how to initialize the tool and execute a search the web with a given query:
```python
from crewai_tools import SerplyWebSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyWebSearchTool()
# increase search limits to 100 results
tool = SerplyWebSearchTool(limit=100)
# change results language (fr - French)
tool = SerplyWebSearchTool(hl="fr")
```
## News Search
The following example demonstrates how to initialize the tool and execute a search news with a given query:
```python
from crewai_tools import SerplyNewsSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyNewsSearchTool()
# change country news (JP - Japan)
tool = SerplyNewsSearchTool(proxy_location="JP")
```
## Scholar Search
The following example demonstrates how to initialize the tool and execute a search scholar articles a given query:
```python
from crewai_tools import SerplyScholarSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyScholarSearchTool()
# change country news (GB - Great Britain)
tool = SerplyScholarSearchTool(proxy_location="GB")
```
## Job Search
The following example demonstrates how to initialize the tool and searching for jobs in the USA:
```python
from crewai_tools import SerplyJobSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyJobSearchTool()
```
## Web Page To Markdown
The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
```python
from crewai_tools import SerplyWebpageToMarkdownTool
# Initialize the tool for internet searching capabilities
tool = SerplyWebpageToMarkdownTool()
# change country make request from (DE - Germany)
tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
```
## Combining Multiple Tools
The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
```python
from crewai import Agent
from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
search_tool = SerplyWebSearchTool()
convert_to_markdown = SerplyWebpageToMarkdownTool()
# Creating a senior researcher agent with memory and verbose mode
researcher = Agent(
role='Senior Researcher',
goal='Uncover groundbreaking technologies in {topic}',
verbose=True,
memory=True,
backstory=(
"Driven by curiosity, you're at the forefront of"
"innovation, eager to explore and share knowledge that could change"
"the world."
),
tools=[search_tool, convert_to_markdown],
allow_delegation=True
)
```
## Steps to Get Started
To effectively use the `SerplyApiTool`, follow these steps:
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io).
3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool.
## Conclusion
By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.

View File

@@ -0,0 +1,75 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyJobSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
class SerplyJobSearchTool(RagTool):
name: str = "Job Search"
description: str = "A tool to perform to perform a job search in the US with a search_query."
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
request_url: str = "https://api.serply.io/v1/job/search/"
proxy_location: Optional[str] = "US"
"""
proxy_location: (str): Where to get jobs, specifically for a specific country results.
- Currently only supports US
"""
headers: Optional[dict] = {}
def __init__(
self,
**kwargs
):
super().__init__(**kwargs)
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": self.proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
query_payload = {}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.request_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
jobs = response.json().get("jobs", "")
if not jobs:
return ""
string = []
for job in jobs:
try:
string.append('\n'.join([
f"Position: {job['position']}",
f"Employer: {job['employer']}",
f"Location: {job['location']}",
f"Link: {job['link']}",
f"""Highest: {', '.join([h for h in job['highlights']])}""",
f"Is Remote: {job['is_remote']}",
f"Is Hybrid: {job['is_remote']}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"

View File

@@ -0,0 +1,81 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
class SerplyNewsSearchTool(BaseTool):
name: str = "News Search"
description: str = "A tool to perform News article search with a search_query."
args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema
search_url: str = "https://api.serply.io/v1/news/"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
limit: Optional[int] = 10
def __init__(
self,
limit: Optional[int] = 10,
proxy_location: Optional[str] = "US",
**kwargs
):
"""
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
# build query parameters
query_payload = {}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "entries" in results:
results = results['entries']
string = []
for result in results[:self.limit]:
try:
# follow url
r = requests.get(result['link'])
final_link = r.history[-1].headers['Location']
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {final_link}",
f"Source: {result['source']['title']}",
f"Published: {result['published']}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -0,0 +1,86 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
class SerplyScholarSearchTool(BaseTool):
name: str = "Scholar Search"
description: str = "A tool to perform News article search with a search_query."
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
search_url: str = "https://api.serply.io/v1/scholar/"
hl: Optional[str] = "us"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
proxy_location: Optional[str] = "US",
**kwargs
):
"""
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.hl = hl
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
query_payload = {
"hl": self.hl
}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
articles = response.json().get("articles", "")
if not articles:
return ""
string = []
for article in articles:
try:
if "doc" in article:
link = article['doc']['link']
else:
link = article['link']
authors = [author['name'] for author in article['author']['authors']]
string.append('\n'.join([
f"Title: {article['title']}",
f"Link: {link}",
f"Description: {article['description']}",
f"Cite: {article['cite']}",
f"Authors: {', '.join(authors)}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"

View File

@@ -0,0 +1,93 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
class SerplyWebSearchTool(BaseTool):
name: str = "Google Search"
description: str = "A tool to perform Google search with a search_query."
args_schema: Type[BaseModel] = SerplyWebSearchToolSchema
search_url: str = "https://api.serply.io/v1/search/"
hl: Optional[str] = "us"
limit: Optional[int] = 10
device_type: Optional[str] = "desktop"
proxy_location: Optional[str] = "US"
query_payload: Optional[dict] = {}
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
limit: int = 10,
device_type: str = "desktop",
proxy_location: str = "US",
**kwargs
):
"""
param: query (str): The query to search for
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
param: device_type (str): desktop/mobile results (defaults to desktop)
proxy_location: (str): Where to perform the search, specifically for local/regional results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
self.device_type = device_type
self.proxy_location = proxy_location
# build query parameters
self.query_payload = {
"num": limit,
"gl": proxy_location.upper(),
"hl": hl.lower()
}
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"X-User-Agent": device_type,
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
if "query" in kwargs:
self.query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
self.query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(self.query_payload)}"
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "results" in results:
results = results['results']
string = []
for result in results:
try:
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Description: {result['description'].strip()}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -0,0 +1,48 @@
import os
import requests
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyWebpageToMarkdownToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
class SerplyWebpageToMarkdownTool(RagTool):
name: str = "Webpage to Markdown"
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
request_url: str = "https://api.serply.io/v1/request"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
proxy_location: Optional[str] = "US",
**kwargs
):
"""
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
data = {
"url": kwargs["url"],
"method": "GET",
"response_type": "markdown"
}
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
return response.text