diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index aaca3396e..d9b1fa753 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -24,10 +24,15 @@ from .tools import ( ScrapeWebsiteTool, SeleniumScrapingTool, SerperDevTool, + SerplyWebSearchTool, + SerplyNewsSearchTool, + SerplyScholarSearchTool, + SerplyWebpageToMarkdownTool, + SerplyJobSearchTool, TXTSearchTool, WebsiteSearchTool, XMLSearchTool, YoutubeChannelSearchTool, - YoutubeVideoSearchTool, + YoutubeVideoSearchTool ) from .tools.base_tool import BaseTool, Tool, tool diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index c9ddb3444..26f0e9d4b 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -25,10 +25,15 @@ from .scrape_element_from_website.scrape_element_from_website import ( from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool from .serper_dev_tool.serper_dev_tool import SerperDevTool +from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool +from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool +from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool +from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool +from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool from .txt_search_tool.txt_search_tool import TXTSearchTool from .website_search.website_search_tool import WebsiteSearchTool from .xml_search_tool.xml_search_tool import XMLSearchTool from .youtube_channel_search_tool.youtube_channel_search_tool import ( YoutubeChannelSearchTool, ) -from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool +from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchToo \ No newline at end of file diff --git a/src/crewai_tools/tools/serply_api_tool/README.md b/src/crewai_tools/tools/serply_api_tool/README.md new file mode 100644 index 000000000..5c6b9395e --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/README.md @@ -0,0 +1,117 @@ +# Serply API Documentation + +## Description +This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user. + +## Installation + +To incorporate this tool into your project, follow the installation instructions below: +```shell +pip install 'crewai[tools]' +``` + +## Examples + +## Web Search +The following example demonstrates how to initialize the tool and execute a search the web with a given query: + +```python +from crewai_tools import SerplyWebSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyWebSearchTool() + +# increase search limits to 100 results +tool = SerplyWebSearchTool(limit=100) + + +# change results language (fr - French) +tool = SerplyWebSearchTool(hl="fr") +``` + +## News Search +The following example demonstrates how to initialize the tool and execute a search news with a given query: + +```python +from crewai_tools import SerplyNewsSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyNewsSearchTool() + +# change country news (JP - Japan) +tool = SerplyNewsSearchTool(proxy_location="JP") +``` + +## Scholar Search +The following example demonstrates how to initialize the tool and execute a search scholar articles a given query: + +```python +from crewai_tools import SerplyScholarSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyScholarSearchTool() + +# change country news (GB - Great Britain) +tool = SerplyScholarSearchTool(proxy_location="GB") +``` + +## Job Search +The following example demonstrates how to initialize the tool and searching for jobs in the USA: + +```python +from crewai_tools import SerplyJobSearchTool + +# Initialize the tool for internet searching capabilities +tool = SerplyJobSearchTool() +``` + + +## Web Page To Markdown +The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown: + +```python +from crewai_tools import SerplyWebpageToMarkdownTool + +# Initialize the tool for internet searching capabilities +tool = SerplyWebpageToMarkdownTool() + +# change country make request from (DE - Germany) +tool = SerplyWebpageToMarkdownTool(proxy_location="DE") +``` + +## Combining Multiple Tools + +The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points. + +```python +from crewai import Agent +from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool + +search_tool = SerplyWebSearchTool() +convert_to_markdown = SerplyWebpageToMarkdownTool() + +# Creating a senior researcher agent with memory and verbose mode +researcher = Agent( + role='Senior Researcher', + goal='Uncover groundbreaking technologies in {topic}', + verbose=True, + memory=True, + backstory=( + "Driven by curiosity, you're at the forefront of" + "innovation, eager to explore and share knowledge that could change" + "the world." + ), + tools=[search_tool, convert_to_markdown], + allow_delegation=True +) +``` + +## Steps to Get Started +To effectively use the `SerplyApiTool`, follow these steps: + +1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. +2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io). +3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool. + +## Conclusion +By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. diff --git a/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py new file mode 100644 index 000000000..358e312c7 --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py @@ -0,0 +1,75 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.rag.rag_tool import RagTool + + +class SerplyJobSearchToolSchema(BaseModel): + """Input for Serply Scholar Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.") + + +class SerplyJobSearchTool(RagTool): + name: str = "Job Search" + description: str = "A tool to perform to perform a job search in the US with a search_query." + args_schema: Type[BaseModel] = SerplyJobSearchToolSchema + request_url: str = "https://api.serply.io/v1/job/search/" + proxy_location: Optional[str] = "US" + """ + proxy_location: (str): Where to get jobs, specifically for a specific country results. + - Currently only supports US + """ + headers: Optional[dict] = {} + + def __init__( + self, + **kwargs + ): + super().__init__(**kwargs) + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": self.proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + query_payload = {} + + if "query" in kwargs: + query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.request_url}{urlencode(query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + + jobs = response.json().get("jobs", "") + + if not jobs: + return "" + + string = [] + for job in jobs: + try: + string.append('\n'.join([ + f"Position: {job['position']}", + f"Employer: {job['employer']}", + f"Location: {job['location']}", + f"Link: {job['link']}", + f"""Highest: {', '.join([h for h in job['highlights']])}""", + f"Is Remote: {job['is_remote']}", + f"Is Hybrid: {job['is_remote']}", + "---" + ])) + except KeyError: + continue + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py new file mode 100644 index 000000000..f1127246e --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -0,0 +1,81 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + + +class SerplyNewsSearchToolSchema(BaseModel): + """Input for Serply News Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles") + + +class SerplyNewsSearchTool(BaseTool): + name: str = "News Search" + description: str = "A tool to perform News article search with a search_query." + args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema + search_url: str = "https://api.serply.io/v1/news/" + proxy_location: Optional[str] = "US" + headers: Optional[dict] = {} + limit: Optional[int] = 10 + + def __init__( + self, + limit: Optional[int] = 10, + proxy_location: Optional[str] = "US", + **kwargs + ): + """ + param: limit (int): The maximum number of results to return [10-100, defaults to 10] + proxy_location: (str): Where to get news, specifically for a specific country results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + self.limit = limit + self.proxy_location = proxy_location + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + # build query parameters + query_payload = {} + + if "query" in kwargs: + query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.search_url}{urlencode(query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + results = response.json() + if "entries" in results: + results = results['entries'] + string = [] + for result in results[:self.limit]: + try: + # follow url + r = requests.get(result['link']) + final_link = r.history[-1].headers['Location'] + string.append('\n'.join([ + f"Title: {result['title']}", + f"Link: {final_link}", + f"Source: {result['source']['title']}", + f"Published: {result['published']}", + "---" + ])) + except KeyError: + continue + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" + else: + return results diff --git a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py new file mode 100644 index 000000000..62c3bef7f --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -0,0 +1,86 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + + +class SerplyScholarSearchToolSchema(BaseModel): + """Input for Serply Scholar Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature") + + +class SerplyScholarSearchTool(BaseTool): + name: str = "Scholar Search" + description: str = "A tool to perform News article search with a search_query." + args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema + search_url: str = "https://api.serply.io/v1/scholar/" + hl: Optional[str] = "us" + proxy_location: Optional[str] = "US" + headers: Optional[dict] = {} + + def __init__( + self, + hl: str = "us", + proxy_location: Optional[str] = "US", + **kwargs + ): + """ + param: hl (str): host Language code to display results in + (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) + proxy_location: (str): Where to get news, specifically for a specific country results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + self.hl = hl + self.proxy_location = proxy_location + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + query_payload = { + "hl": self.hl + } + + if "query" in kwargs: + query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.search_url}{urlencode(query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + articles = response.json().get("articles", "") + + if not articles: + return "" + + string = [] + for article in articles: + try: + if "doc" in article: + link = article['doc']['link'] + else: + link = article['link'] + authors = [author['name'] for author in article['author']['authors']] + string.append('\n'.join([ + f"Title: {article['title']}", + f"Link: {link}", + f"Description: {article['description']}", + f"Cite: {article['cite']}", + f"Authors: {', '.join(authors)}", + "---" + ])) + except KeyError: + continue + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py new file mode 100644 index 000000000..894c24741 --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py @@ -0,0 +1,93 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + + +class SerplyWebSearchToolSchema(BaseModel): + """Input for Serply Web Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to Google search") + + +class SerplyWebSearchTool(BaseTool): + name: str = "Google Search" + description: str = "A tool to perform Google search with a search_query." + args_schema: Type[BaseModel] = SerplyWebSearchToolSchema + search_url: str = "https://api.serply.io/v1/search/" + hl: Optional[str] = "us" + limit: Optional[int] = 10 + device_type: Optional[str] = "desktop" + proxy_location: Optional[str] = "US" + query_payload: Optional[dict] = {} + headers: Optional[dict] = {} + + def __init__( + self, + hl: str = "us", + limit: int = 10, + device_type: str = "desktop", + proxy_location: str = "US", + **kwargs + ): + """ + param: query (str): The query to search for + param: hl (str): host Language code to display results in + (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) + param: limit (int): The maximum number of results to return [10-100, defaults to 10] + param: device_type (str): desktop/mobile results (defaults to desktop) + proxy_location: (str): Where to perform the search, specifically for local/regional results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + + self.limit = limit + self.device_type = device_type + self.proxy_location = proxy_location + + # build query parameters + self.query_payload = { + "num": limit, + "gl": proxy_location.upper(), + "hl": hl.lower() + } + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "X-User-Agent": device_type, + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + if "query" in kwargs: + self.query_payload["q"] = kwargs["query"] + elif "search_query" in kwargs: + self.query_payload["q"] = kwargs["search_query"] + + # build the url + url = f"{self.search_url}{urlencode(self.query_payload)}" + + response = requests.request("GET", url, headers=self.headers) + results = response.json() + if "results" in results: + results = results['results'] + string = [] + for result in results: + try: + string.append('\n'.join([ + f"Title: {result['title']}", + f"Link: {result['link']}", + f"Description: {result['description'].strip()}", + "---" + ])) + except KeyError: + continue + + content = '\n'.join(string) + return f"\nSearch results: {content}\n" + else: + return results diff --git a/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py new file mode 100644 index 000000000..27ffc54ce --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py @@ -0,0 +1,48 @@ +import os +import requests +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.rag.rag_tool import RagTool + + +class SerplyWebpageToMarkdownToolSchema(BaseModel): + """Input for Serply Scholar Search.""" + url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown") + + +class SerplyWebpageToMarkdownTool(RagTool): + name: str = "Webpage to Markdown" + description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand" + args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema + request_url: str = "https://api.serply.io/v1/request" + proxy_location: Optional[str] = "US" + headers: Optional[dict] = {} + + def __init__( + self, + proxy_location: Optional[str] = "US", + **kwargs + ): + """ + proxy_location: (str): Where to get news, specifically for a specific country results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + self.proxy_location = proxy_location + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + data = { + "url": kwargs["url"], + "method": "GET", + "response_type": "markdown" + } + response = requests.request("POST", self.request_url, headers=self.headers, json=data) + return response.text