From d8b8edab087fa7da4e085c2d83154f3c4a272d63 Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Mon, 10 Jun 2024 21:15:21 -0400 Subject: [PATCH] adding google search, sholar, and news --- src/crewai_tools/__init__.py | 3 + src/crewai_tools/tools/__init__.py | 3 + .../tools/serply_api_tool/README.md | 67 +++++++++++++ .../serply_news_search_tool.py | 80 ++++++++++++++++ .../serply_scholar_search_tool.py | 85 +++++++++++++++++ .../serply_api_tool/serply_web_search_tool.py | 93 +++++++++++++++++++ 6 files changed, 331 insertions(+) create mode 100644 src/crewai_tools/tools/serply_api_tool/README.md create mode 100644 src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py create mode 100644 src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py create mode 100644 src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index faac5d37d..a9013b7ee 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -23,4 +23,7 @@ from .tools import ( XMLSearchTool, YoutubeChannelSearchTool, YoutubeVideoSearchTool, + SerplyWebSearchTool, + SerplyNewsSearchTool, + SerplyScholarSearchTool ) \ No newline at end of file diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 648671d97..138dbce17 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -21,3 +21,6 @@ from .website_search.website_search_tool import WebsiteSearchTool from .xml_search_tool.xml_search_tool import XMLSearchTool from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool +from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool +from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool +from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool diff --git a/src/crewai_tools/tools/serply_api_tool/README.md b/src/crewai_tools/tools/serply_api_tool/README.md new file mode 100644 index 000000000..fe439b28f --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/README.md @@ -0,0 +1,67 @@ +# Serply API Documentation + +## Description +This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user. + +## Installation + +To incorporate this tool into your project, follow the installation instructions below: +```shell +pip install 'crewai[tools]' +``` + +## Examples + +## Web Search +The following example demonstrates how to initialize the tool and execute a search the web with a given query: + +```python +from crewai_tools import SerplyWebSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyWebSearchTool() + +# increase search limits to 100 results +tool = SerplyWebSearchTool(limit=100) + + +# change results language (fr - French) +tool = SerplyWebSearchTool(hl="fr") +``` + +## News Search +The following example demonstrates how to initialize the tool and execute a search news with a given query: + +```python +from crewai_tools import SerplyNewsSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyNewsSearchTool() + +# change country news (JP - Japan) +tool = SerplyNewsSearchTool(proxy_location="JP") +``` + +## Scholar Search +The following example demonstrates how to initialize the tool and execute a search scholar articles a given query: + +```python +from crewai_tools import SerplyScholarSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyScholarSearchTool() + +# change country news (GB - Great Britain) +tool = SerplyScholarSearchTool(proxy_location="GB") +``` + + +## Steps to Get Started +To effectively use the `SerplyApiTool`, follow these steps: + +1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. +2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io). +3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool. + +## Conclusion +By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. diff --git a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py new file mode 100644 index 000000000..c1fef5a77 --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -0,0 +1,80 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + +class SerplyNewsSearchToolSchema(BaseModel): + """Input for Serply News Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles") + + +class SerplyNewsSearchTool(BaseTool): + name: str = "News Search" + description: str = "A tool to perform News article search with a search_query." + args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema + search_url: str = "https://api.serply.io/v1/news/" + proxy_location: Optional[str] = "US" + headers: Optional[dict] = {} + limit: Optional[int] = 10 + + def __init__( + self, + limit: Optional[int] = 10, + proxy_location: Optional[str] = "US", + **kwargs + ): + """ + param: limit (int): The maximum number of results to return [10-100, defaults to 10] + proxy_location: (str): Where to get news, specifically for a specific country results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + self.limit = limit + self.proxy_location = proxy_location + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + # build query parameters + query_payload = {} + + if "query" in kwargs: + query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.search_url}{urlencode(query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + results = response.json() + if "entries" in results: + results = results['entries'] + string = [] + for result in results[:self.limit]: + try: + # follow url + r = requests.get(result['link']) + final_link = r.history[-1].headers['Location'] + string.append('\n'.join([ + f"Title: {result['title']}", + f"Link: {final_link}", + f"Source: {result['source']['title']}", + f"Published: {result['published']}", + "---" + ])) + except KeyError: + next + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" + else: + return results diff --git a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py new file mode 100644 index 000000000..badc9950e --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -0,0 +1,85 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + +class SerplyScholarSearchToolSchema(BaseModel): + """Input for Serply Scholar Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature") + + +class SerplyScholarSearchTool(BaseTool): + name: str = "Scholar Search" + description: str = "A tool to perform News article search with a search_query." + args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema + search_url: str = "https://api.serply.io/v1/scholar/" + hl: Optional[str] = "us" + proxy_location: Optional[str] = "US" + headers: Optional[dict] = {} + + def __init__( + self, + hl: str = "us", + proxy_location: Optional[str] = "US", + **kwargs + ): + """ + param: hl (str): host Language code to display results in + (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) + proxy_location: (str): Where to get news, specifically for a specific country results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + self.hl = hl + self.proxy_location = proxy_location + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + query_payload = { + "hl": self.hl + } + + if "query" in kwargs: + query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.search_url}{urlencode(query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + articles = response.json().get("articles", "") + + if not articles: + return "" + + string = [] + for article in articles: + try: + if "doc" in article: + link = article['doc']['link'] + else: + link = article['link'] + authors = [author['name'] for author in article['author']['authors']] + string.append('\n'.join([ + f"Title: {article['title']}", + f"Link: {link}", + f"Description: {article['description']}", + f"Cite: {article['cite']}", + f"Authors: {', '.join(authors)}", + "---" + ])) + except KeyError: + next + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py new file mode 100644 index 000000000..5f146c673 --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py @@ -0,0 +1,93 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + + +class SerplyWebSearchToolSchema(BaseModel): + """Input for Serply Web Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to Google search") + + +class SerplyWebSearchTool(BaseTool): + name: str = "Google Search" + description: str = "A tool to perform Google search with a search_query." + args_schema: Type[BaseModel] = SerplyWebSearchToolSchema + search_url: str = "https://api.serply.io/v1/search/" + hl: Optional[str] = "us" + limit: Optional[int] = 10 + device_type: Optional[str] = "desktop" + proxy_location: Optional[str] = "US" + query_payload: Optional[dict] = {} + headers: Optional[dict] = {} + + def __init__( + self, + hl: str = "us", + limit: int = 10, + device_type: str = "desktop", + proxy_location: str = "US", + **kwargs + ): + """ + param: query (str): The query to search for + param: hl (str): host Language code to display results in + (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) + param: limit (int): The maximum number of results to return [10-100, defaults to 10] + param: device_type (str): desktop/mobile results (defaults to desktop) + proxy_location: (str): Where to perform the search, specifically for local/regional results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + + self.limit = limit + self.device_type = device_type + self.proxy_location = proxy_location + + # build query parameters + self.query_payload = { + "num": limit, + "gl": proxy_location.upper(), + "hl": hl.lower() + } + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "X-User-Agent": device_type, + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + if "query" in kwargs: + self.query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + self.query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.search_url}{urlencode(self.query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + results = response.json() + if "results" in results: + results = results['results'] + string = [] + for result in results: + try: + string.append('\n'.join([ + f"Title: {result['title']}", + f"Link: {result['link']}", + f"Description: {result['description'].strip()}", + "---" + ])) + except KeyError: + next + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" + else: + return results