adding webpage to markdown

2026-01-11 09:08:31 +00:00 · 2024-06-10 21:34:53 -04:00
parent d8b8edab08
commit ffe3829cef
6 changed files with 95 additions and 3 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -25,5 +25,6 @@ from .tools import (
 	YoutubeVideoSearchTool,
 	SerplyWebSearchTool,
 	SerplyNewsSearchTool,
-	SerplyScholarSearchTool
+	SerplyScholarSearchTool,
+	SerplyWebpageToMarkdownTool
 )
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -24,3 +24,4 @@ from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSea
 from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
 from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
 from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
+from .serply_api_tool.serply_web_to_markdown_tool import SerplyWebpageToMarkdownTool
--- a/src/crewai_tools/tools/serply_api_tool/README.md
+++ b/src/crewai_tools/tools/serply_api_tool/README.md
@@ -55,6 +55,45 @@ tool = SerplyScholarSearchTool()
 tool = SerplyScholarSearchTool(proxy_location="GB")
 ```

+## Web Page To Markdown
+The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
+
+```python
+from crewai_tools import SerplyWebpageToMarkdownTool
+
+# Initialize the tool for internet searching capabilities
+tool = SerplyWebpageToMarkdownTool()
+
+# change country news (DE - Germany)
+tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
+```
+
+## Combining Multiple Tools
+
+The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
+
+```python
+from crewai import Agent
+from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
+
+search_tool = SerplyWebSearchTool()
+convert_to_markdown = SerplyWebpageToMarkdownTool()
+
+# Creating a senior researcher agent with memory and verbose mode
+researcher = Agent(
+  role='Senior Researcher',
+  goal='Uncover groundbreaking technologies in {topic}',
+  verbose=True,
+  memory=True,
+  backstory=(
+    "Driven by curiosity, you're at the forefront of"
+    "innovation, eager to explore and share knowledge that could change"
+    "the world."
+  ),
+  tools=[search_tool, convert_to_markdown],
+  allow_delegation=True
+)
+```

 ## Steps to Get Started
 To effectively use the `SerplyApiTool`, follow these steps:
--- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py
@@ -5,6 +5,7 @@ from typing import Type, Any, Optional
 from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.base_tool import BaseTool

+
 class SerplyNewsSearchToolSchema(BaseModel):
    """Input for Serply News Search."""
    search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
--- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py
@@ -5,6 +5,7 @@ from typing import Type, Any, Optional
 from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.base_tool import BaseTool

+
 class SerplyScholarSearchToolSchema(BaseModel):
    """Input for Serply Scholar Search."""
    search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
@@ -41,8 +42,8 @@ class SerplyScholarSearchTool(BaseTool):
        }

    def _run(
-        self,
-        **kwargs: Any,
+            self,
+            **kwargs: Any,
    ) -> Any:
        query_payload = {
            "hl": self.hl
--- a/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py
@@ -0,0 +1,49 @@
+import os
+import requests
+from urllib.parse import urlencode
+from typing import Type, Any, Optional
+from pydantic.v1 import BaseModel, Field
+from crewai_tools.tools.rag.rag_tool import RagTool
+
+
+class SerplyWebpageToMarkdownToolSchema(BaseModel):
+    """Input for Serply Scholar Search."""
+    url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
+
+
+class SerplyWebpageToMarkdownTool(RagTool):
+    name: str = "Webpage to Markdown"
+    description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
+    args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
+    request_url: str = "https://api.serply.io/v1/request"
+    proxy_location: Optional[str] = "US"
+    headers: Optional[dict] = {}
+
+    def __init__(
+            self,
+            proxy_location: Optional[str] = "US",
+            **kwargs
+    ):
+        """
+            proxy_location: (str): Where to get news, specifically for a specific country results.
+                 ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
+        """
+        super().__init__(**kwargs)
+        self.proxy_location = proxy_location
+        self.headers = {
+            "X-API-KEY": os.environ["SERPLY_API_KEY"],
+            "User-Agent": "crew-tools",
+            "X-Proxy-Location": proxy_location
+        }
+
+    def _run(
+            self,
+            **kwargs: Any,
+    ) -> Any:
+        data = {
+            "url": kwargs["url"],
+            "method": "get",
+            "response_type": "markdown"
+        }
+        response = requests.request("POST", self.request_url, headers=self.headers, json=data)
+        return response.text