From ffe3829ceff593cfbe2f398bf12051b9ca39e80c Mon Sep 17 00:00:00 2001 From: teampen <136991215+teampen@users.noreply.github.com> Date: Mon, 10 Jun 2024 21:34:53 -0400 Subject: [PATCH] adding webpage to markdown --- src/crewai_tools/__init__.py | 3 +- src/crewai_tools/tools/__init__.py | 1 + .../tools/serply_api_tool/README.md | 39 +++++++++++++++ .../serply_news_search_tool.py | 1 + .../serply_scholar_search_tool.py | 5 +- .../serply_web_to_markdown_tool.py | 49 +++++++++++++++++++ 6 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index a9013b7ee..beb228936 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -25,5 +25,6 @@ from .tools import ( YoutubeVideoSearchTool, SerplyWebSearchTool, SerplyNewsSearchTool, - SerplyScholarSearchTool + SerplyScholarSearchTool, + SerplyWebpageToMarkdownTool ) \ No newline at end of file diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 138dbce17..11ceebfaa 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -24,3 +24,4 @@ from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSea from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool +from .serply_api_tool.serply_web_to_markdown_tool import SerplyWebpageToMarkdownTool diff --git a/src/crewai_tools/tools/serply_api_tool/README.md b/src/crewai_tools/tools/serply_api_tool/README.md index fe439b28f..22292bcf7 100644 --- a/src/crewai_tools/tools/serply_api_tool/README.md +++ b/src/crewai_tools/tools/serply_api_tool/README.md @@ -55,6 +55,45 @@ tool = SerplyScholarSearchTool() tool = SerplyScholarSearchTool(proxy_location="GB") ``` +## Web Page To Markdown +The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown: + +```python +from crewai_tools import SerplyWebpageToMarkdownTool + +# Initialize the tool for internet searching capabilities +tool = SerplyWebpageToMarkdownTool() + +# change country news (DE - Germany) +tool = SerplyWebpageToMarkdownTool(proxy_location="DE") +``` + +## Combining Multiple Tools + +The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points. + +```python +from crewai import Agent +from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool + +search_tool = SerplyWebSearchTool() +convert_to_markdown = SerplyWebpageToMarkdownTool() + +# Creating a senior researcher agent with memory and verbose mode +researcher = Agent( + role='Senior Researcher', + goal='Uncover groundbreaking technologies in {topic}', + verbose=True, + memory=True, + backstory=( + "Driven by curiosity, you're at the forefront of" + "innovation, eager to explore and share knowledge that could change" + "the world." + ), + tools=[search_tool, convert_to_markdown], + allow_delegation=True +) +``` ## Steps to Get Started To effectively use the `SerplyApiTool`, follow these steps: diff --git a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py index c1fef5a77..40b1415b7 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -5,6 +5,7 @@ from typing import Type, Any, Optional from pydantic.v1 import BaseModel, Field from crewai_tools.tools.base_tool import BaseTool + class SerplyNewsSearchToolSchema(BaseModel): """Input for Serply News Search.""" search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles") diff --git a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py index badc9950e..dc7449353 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -5,6 +5,7 @@ from typing import Type, Any, Optional from pydantic.v1 import BaseModel, Field from crewai_tools.tools.base_tool import BaseTool + class SerplyScholarSearchToolSchema(BaseModel): """Input for Serply Scholar Search.""" search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature") @@ -41,8 +42,8 @@ class SerplyScholarSearchTool(BaseTool): } def _run( - self, - **kwargs: Any, + self, + **kwargs: Any, ) -> Any: query_payload = { "hl": self.hl diff --git a/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py new file mode 100644 index 000000000..36a42a48f --- /dev/null +++ b/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py @@ -0,0 +1,49 @@ +import os +import requests +from urllib.parse import urlencode +from typing import Type, Any, Optional +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.rag.rag_tool import RagTool + + +class SerplyWebpageToMarkdownToolSchema(BaseModel): + """Input for Serply Scholar Search.""" + url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown") + + +class SerplyWebpageToMarkdownTool(RagTool): + name: str = "Webpage to Markdown" + description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand" + args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema + request_url: str = "https://api.serply.io/v1/request" + proxy_location: Optional[str] = "US" + headers: Optional[dict] = {} + + def __init__( + self, + proxy_location: Optional[str] = "US", + **kwargs + ): + """ + proxy_location: (str): Where to get news, specifically for a specific country results. + ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) + """ + super().__init__(**kwargs) + self.proxy_location = proxy_location + self.headers = { + "X-API-KEY": os.environ["SERPLY_API_KEY"], + "User-Agent": "crew-tools", + "X-Proxy-Location": proxy_location + } + + def _run( + self, + **kwargs: Any, + ) -> Any: + data = { + "url": kwargs["url"], + "method": "get", + "response_type": "markdown" + } + response = requests.request("POST", self.request_url, headers=self.headers, json=data) + return response.text