Merge pull request #53 from googio/adding-serply-api

Adding Serply API
2026-01-14 02:28:30 +00:00 · 2024-07-14 13:37:03 -07:00
parent c9d22489b8 2bcb9e7000
commit 75ee346d18
8 changed files with 512 additions and 2 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -24,10 +24,15 @@ from .tools import (
    ScrapeWebsiteTool,
    SeleniumScrapingTool,
    SerperDevTool,
+    SerplyWebSearchTool,
+    SerplyNewsSearchTool,
+    SerplyScholarSearchTool,
+    SerplyWebpageToMarkdownTool,
+    SerplyJobSearchTool,
    TXTSearchTool,
    WebsiteSearchTool,
    XMLSearchTool,
    YoutubeChannelSearchTool,
-    YoutubeVideoSearchTool,
+    YoutubeVideoSearchTool
 )
 from .tools.base_tool import BaseTool, Tool, tool
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -25,10 +25,15 @@ from .scrape_element_from_website.scrape_element_from_website import (
 from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
 from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
 from .serper_dev_tool.serper_dev_tool import SerperDevTool
+from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
+from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
+from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
+from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
+from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
 from .txt_search_tool.txt_search_tool import TXTSearchTool
 from .website_search.website_search_tool import WebsiteSearchTool
 from .xml_search_tool.xml_search_tool import XMLSearchTool
 from .youtube_channel_search_tool.youtube_channel_search_tool import (
    YoutubeChannelSearchTool,
 )
-from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
+from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchToo
--- a/src/crewai_tools/tools/serply_api_tool/README.md
+++ b/src/crewai_tools/tools/serply_api_tool/README.md
@@ -0,0 +1,117 @@
+# Serply API Documentation
+
+## Description
+This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user.
+
+## Installation
+
+To incorporate this tool into your project, follow the installation instructions below:
+```shell
+pip install 'crewai[tools]'
+```
+
+## Examples
+
+## Web Search
+The following example demonstrates how to initialize the tool and execute a search the web with a given query:
+
+```python
+from crewai_tools import SerplyWebSearchTool
+
+# Initialize the tool for internet searching capabilities
+tool = SerplyWebSearchTool()
+
+# increase search limits to 100 results
+tool = SerplyWebSearchTool(limit=100)
+
+
+# change results language (fr - French)
+tool = SerplyWebSearchTool(hl="fr")
+```
+
+## News Search
+The following example demonstrates how to initialize the tool and execute a search news with a given query:
+
+```python
+from crewai_tools import SerplyNewsSearchTool
+
+# Initialize the tool for internet searching capabilities
+tool = SerplyNewsSearchTool()
+
+# change country news (JP - Japan)
+tool = SerplyNewsSearchTool(proxy_location="JP")
+```
+
+## Scholar Search
+The following example demonstrates how to initialize the tool and execute a search scholar articles a given query:
+
+```python
+from crewai_tools import SerplyScholarSearchTool
+
+# Initialize the tool for internet searching capabilities
+tool = SerplyScholarSearchTool()
+
+# change country news (GB - Great Britain)
+tool = SerplyScholarSearchTool(proxy_location="GB")
+```
+
+## Job Search
+The following example demonstrates how to initialize the tool and searching for jobs in the USA:
+
+```python
+from crewai_tools import SerplyJobSearchTool
+
+# Initialize the tool for internet searching capabilities
+tool = SerplyJobSearchTool()
+```
+
+
+## Web Page To Markdown
+The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
+
+```python
+from crewai_tools import SerplyWebpageToMarkdownTool
+
+# Initialize the tool for internet searching capabilities
+tool = SerplyWebpageToMarkdownTool()
+
+# change country make request from (DE - Germany)
+tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
+```
+
+## Combining Multiple Tools
+
+The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
+
+```python
+from crewai import Agent
+from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
+
+search_tool = SerplyWebSearchTool()
+convert_to_markdown = SerplyWebpageToMarkdownTool()
+
+# Creating a senior researcher agent with memory and verbose mode
+researcher = Agent(
+  role='Senior Researcher',
+  goal='Uncover groundbreaking technologies in {topic}',
+  verbose=True,
+  memory=True,
+  backstory=(
+    "Driven by curiosity, you're at the forefront of"
+    "innovation, eager to explore and share knowledge that could change"
+    "the world."
+  ),
+  tools=[search_tool, convert_to_markdown],
+  allow_delegation=True
+)
+```
+
+## Steps to Get Started
+To effectively use the `SerplyApiTool`, follow these steps:
+
+1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
+2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io).
+3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool.
+
+## Conclusion
+By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
--- a/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py
@@ -0,0 +1,75 @@
+import os
+import requests
+from urllib.parse import urlencode
+from typing import Type, Any, Optional
+from pydantic.v1 import BaseModel, Field
+from crewai_tools.tools.rag.rag_tool import RagTool
+
+
+class SerplyJobSearchToolSchema(BaseModel):
+    """Input for Serply Scholar Search."""
+    search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
+
+
+class SerplyJobSearchTool(RagTool):
+    name: str = "Job Search"
+    description: str = "A tool to perform to perform a job search in the US with a search_query."
+    args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
+    request_url: str = "https://api.serply.io/v1/job/search/"
+    proxy_location: Optional[str] = "US"
+    """
+        proxy_location: (str): Where to get jobs, specifically for a specific country results.
+            - Currently only supports US
+    """
+    headers: Optional[dict] = {}
+
+    def __init__(
+            self,
+            **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.headers = {
+            "X-API-KEY": os.environ["SERPLY_API_KEY"],
+            "User-Agent": "crew-tools",
+            "X-Proxy-Location": self.proxy_location
+        }
+
+    def _run(
+            self,
+            **kwargs: Any,
+    ) -> Any:
+        query_payload = {}
+
+        if "query" in kwargs:
+            query_payload["q"] = kwargs["query"]
+        elif "search_query" in kwargs:
+            query_payload["q"] = kwargs["search_query"]
+
+        # build the url
+        url = f"{self.request_url}{urlencode(query_payload)}"
+
+        response = requests.request("GET", url, headers=self.headers)
+
+        jobs = response.json().get("jobs", "")
+
+        if not jobs:
+            return ""
+
+        string = []
+        for job in jobs:
+            try:
+                string.append('\n'.join([
+                    f"Position: {job['position']}",
+                    f"Employer: {job['employer']}",
+                    f"Location: {job['location']}",
+                    f"Link: {job['link']}",
+                    f"""Highest: {', '.join([h for h in job['highlights']])}""",
+                    f"Is Remote: {job['is_remote']}",
+                    f"Is Hybrid: {job['is_remote']}",
+                    "---"
+                ]))
+            except KeyError:
+                continue
+
+        content = '\n'.join(string)
+        return f"\nSearch results: {content}\n"
--- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py
@@ -0,0 +1,81 @@
+import os
+import requests
+from urllib.parse import urlencode
+from typing import Type, Any, Optional
+from pydantic.v1 import BaseModel, Field
+from crewai_tools.tools.base_tool import BaseTool
+
+
+class SerplyNewsSearchToolSchema(BaseModel):
+    """Input for Serply News Search."""
+    search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
+
+
+class SerplyNewsSearchTool(BaseTool):
+    name: str = "News Search"
+    description: str = "A tool to perform News article search with a search_query."
+    args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema
+    search_url: str = "https://api.serply.io/v1/news/"
+    proxy_location: Optional[str] = "US"
+    headers: Optional[dict] = {}
+    limit: Optional[int] = 10
+
+    def __init__(
+            self,
+            limit: Optional[int] = 10,
+            proxy_location: Optional[str] = "US",
+            **kwargs
+    ):
+        """
+            param: limit (int): The maximum number of results to return [10-100, defaults to 10]
+            proxy_location: (str): Where to get news, specifically for a specific country results.
+                 ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
+        """
+        super().__init__(**kwargs)
+        self.limit = limit
+        self.proxy_location = proxy_location
+        self.headers = {
+            "X-API-KEY": os.environ["SERPLY_API_KEY"],
+            "User-Agent": "crew-tools",
+            "X-Proxy-Location": proxy_location
+        }
+
+    def _run(
+            self,
+            **kwargs: Any,
+    ) -> Any:
+        # build query parameters
+        query_payload = {}
+
+        if "query" in kwargs:
+            query_payload["q"] = kwargs["query"]
+        elif "search_query" in kwargs:
+            query_payload["q"] = kwargs["search_query"]
+
+        # build the url
+        url = f"{self.search_url}{urlencode(query_payload)}"
+
+        response = requests.request("GET", url, headers=self.headers)
+        results = response.json()
+        if "entries" in results:
+            results = results['entries']
+            string = []
+            for result in results[:self.limit]:
+                try:
+                    # follow url
+                    r = requests.get(result['link'])
+                    final_link = r.history[-1].headers['Location']
+                    string.append('\n'.join([
+                        f"Title: {result['title']}",
+                        f"Link: {final_link}",
+                        f"Source: {result['source']['title']}",
+                        f"Published: {result['published']}",
+                        "---"
+                    ]))
+                except KeyError:
+                    continue
+
+            content = '\n'.join(string)
+            return f"\nSearch results: {content}\n"
+        else:
+            return results
--- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py
@@ -0,0 +1,86 @@
+import os
+import requests
+from urllib.parse import urlencode
+from typing import Type, Any, Optional
+from pydantic.v1 import BaseModel, Field
+from crewai_tools.tools.base_tool import BaseTool
+
+
+class SerplyScholarSearchToolSchema(BaseModel):
+    """Input for Serply Scholar Search."""
+    search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
+
+
+class SerplyScholarSearchTool(BaseTool):
+    name: str = "Scholar Search"
+    description: str = "A tool to perform News article search with a search_query."
+    args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
+    search_url: str = "https://api.serply.io/v1/scholar/"
+    hl: Optional[str] = "us"
+    proxy_location: Optional[str] = "US"
+    headers: Optional[dict] = {}
+
+    def __init__(
+            self,
+            hl: str = "us",
+            proxy_location: Optional[str] = "US",
+            **kwargs
+    ):
+        """
+            param: hl (str): host Language code to display results in
+                (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
+            proxy_location: (str): Where to get news, specifically for a specific country results.
+                 ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
+        """
+        super().__init__(**kwargs)
+        self.hl = hl
+        self.proxy_location = proxy_location
+        self.headers = {
+            "X-API-KEY": os.environ["SERPLY_API_KEY"],
+            "User-Agent": "crew-tools",
+            "X-Proxy-Location": proxy_location
+        }
+
+    def _run(
+            self,
+            **kwargs: Any,
+    ) -> Any:
+        query_payload = {
+            "hl": self.hl
+        }
+
+        if "query" in kwargs:
+            query_payload["q"] = kwargs["query"]
+        elif "search_query" in kwargs:
+            query_payload["q"] = kwargs["search_query"]
+
+        # build the url
+        url = f"{self.search_url}{urlencode(query_payload)}"
+
+        response = requests.request("GET", url, headers=self.headers)
+        articles = response.json().get("articles", "")
+
+        if not articles:
+            return ""
+
+        string = []
+        for article in articles:
+            try:
+                if "doc" in article:
+                    link = article['doc']['link']
+                else:
+                    link = article['link']
+                authors = [author['name'] for author in article['author']['authors']]
+                string.append('\n'.join([
+                    f"Title: {article['title']}",
+                    f"Link: {link}",
+                    f"Description: {article['description']}",
+                    f"Cite: {article['cite']}",
+                    f"Authors: {', '.join(authors)}",
+                    "---"
+                ]))
+            except KeyError:
+                continue
+
+        content = '\n'.join(string)
+        return f"\nSearch results: {content}\n"
--- a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py
@@ -0,0 +1,93 @@
+import os
+import requests
+from urllib.parse import urlencode
+from typing import Type, Any, Optional
+from pydantic.v1 import BaseModel, Field
+from crewai_tools.tools.base_tool import BaseTool
+
+
+class SerplyWebSearchToolSchema(BaseModel):
+    """Input for Serply Web Search."""
+    search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
+
+
+class SerplyWebSearchTool(BaseTool):
+    name: str = "Google Search"
+    description: str = "A tool to perform Google search with a search_query."
+    args_schema: Type[BaseModel] = SerplyWebSearchToolSchema
+    search_url: str = "https://api.serply.io/v1/search/"
+    hl: Optional[str] = "us"
+    limit: Optional[int] = 10
+    device_type: Optional[str] = "desktop"
+    proxy_location: Optional[str] = "US"
+    query_payload: Optional[dict] = {}
+    headers: Optional[dict] = {}
+
+    def __init__(
+            self,
+            hl: str = "us",
+            limit: int = 10,
+            device_type: str = "desktop",
+            proxy_location: str = "US",
+            **kwargs
+    ):
+        """
+            param: query (str): The query to search for
+            param: hl (str): host Language code to display results in
+                (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
+            param: limit (int): The maximum number of results to return [10-100, defaults to 10]
+            param: device_type (str): desktop/mobile results (defaults to desktop)
+            proxy_location: (str): Where to perform the search, specifically for local/regional results.
+                 ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
+        """
+        super().__init__(**kwargs)
+
+        self.limit = limit
+        self.device_type = device_type
+        self.proxy_location = proxy_location
+
+        # build query parameters
+        self.query_payload = {
+            "num": limit,
+            "gl": proxy_location.upper(),
+            "hl": hl.lower()
+        }
+        self.headers = {
+            "X-API-KEY": os.environ["SERPLY_API_KEY"],
+            "X-User-Agent": device_type,
+            "User-Agent": "crew-tools",
+            "X-Proxy-Location": proxy_location
+        }
+
+    def _run(
+            self,
+            **kwargs: Any,
+    ) -> Any:
+        if "query" in kwargs:
+            self.query_payload["q"] = kwargs["query"]
+        elif "search_query" in kwargs:
+            self.query_payload["q"] = kwargs["search_query"]
+
+        # build the url
+        url = f"{self.search_url}{urlencode(self.query_payload)}"
+
+        response = requests.request("GET", url, headers=self.headers)
+        results = response.json()
+        if "results" in results:
+            results = results['results']
+            string = []
+            for result in results:
+                try:
+                    string.append('\n'.join([
+                        f"Title: {result['title']}",
+                        f"Link: {result['link']}",
+                        f"Description: {result['description'].strip()}",
+                        "---"
+                    ]))
+                except KeyError:
+                    continue
+
+            content = '\n'.join(string)
+            return f"\nSearch results: {content}\n"
+        else:
+            return results
--- a/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py
@@ -0,0 +1,48 @@
+import os
+import requests
+from typing import Type, Any, Optional
+from pydantic.v1 import BaseModel, Field
+from crewai_tools.tools.rag.rag_tool import RagTool
+
+
+class SerplyWebpageToMarkdownToolSchema(BaseModel):
+    """Input for Serply Scholar Search."""
+    url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
+
+
+class SerplyWebpageToMarkdownTool(RagTool):
+    name: str = "Webpage to Markdown"
+    description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
+    args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
+    request_url: str = "https://api.serply.io/v1/request"
+    proxy_location: Optional[str] = "US"
+    headers: Optional[dict] = {}
+
+    def __init__(
+            self,
+            proxy_location: Optional[str] = "US",
+            **kwargs
+    ):
+        """
+            proxy_location: (str): Where to get news, specifically for a specific country results.
+                 ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
+        """
+        super().__init__(**kwargs)
+        self.proxy_location = proxy_location
+        self.headers = {
+            "X-API-KEY": os.environ["SERPLY_API_KEY"],
+            "User-Agent": "crew-tools",
+            "X-Proxy-Location": proxy_location
+        }
+
+    def _run(
+            self,
+            **kwargs: Any,
+    ) -> Any:
+        data = {
+            "url": kwargs["url"],
+            "method": "GET",
+            "response_type": "markdown"
+        }
+        response = requests.request("POST", self.request_url, headers=self.headers, json=data)
+        return response.text