adding webpage to markdown

2026-01-10 00:28:31 +00:00 · 2024-06-10 21:34:53 -04:00
parent d8b8edab08
commit ffe3829cef
6 changed files with 95 additions and 3 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -25,5 +25,6 @@ from .tools import (
 	YoutubeVideoSearchTool,
 	SerplyWebSearchTool,
 	SerplyNewsSearchTool,
-	SerplyScholarSearchTool
+	SerplyScholarSearchTool,
 	SerplyWebpageToMarkdownTool
 )
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -24,3 +24,4 @@ from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSea
 from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
 from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
 from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
 from .serply_api_tool.serply_web_to_markdown_tool import SerplyWebpageToMarkdownTool
--- a/src/crewai_tools/tools/serply_api_tool/README.md
+++ b/src/crewai_tools/tools/serply_api_tool/README.md
@@ -55,6 +55,45 @@ tool = SerplyScholarSearchTool()
 tool = SerplyScholarSearchTool(proxy_location="GB")
 ```
 ## Web Page To Markdown
 The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
 ```python
 from crewai_tools import SerplyWebpageToMarkdownTool
 # Initialize the tool for internet searching capabilities
 tool = SerplyWebpageToMarkdownTool()
 # change country news (DE - Germany)
 tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
 ```
 ## Combining Multiple Tools
 The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
 ```python
 from crewai import Agent
 from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
 search_tool = SerplyWebSearchTool()
 convert_to_markdown = SerplyWebpageToMarkdownTool()
 # Creating a senior researcher agent with memory and verbose mode
 researcher = Agent(
  role='Senior Researcher',
  goal='Uncover groundbreaking technologies in {topic}',
  verbose=True,
  memory=True,
  backstory=(
    "Driven by curiosity, you're at the forefront of"
    "innovation, eager to explore and share knowledge that could change"
    "the world."
  ),
  tools=[search_tool, convert_to_markdown],
  allow_delegation=True
 )
 ```
 ## Steps to Get Started
 To effectively use the `SerplyApiTool`, follow these steps:
--- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py
@@ -5,6 +5,7 @@ from typing import Type, Any, Optional
 from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.base_tool import BaseTool
 class SerplyNewsSearchToolSchema(BaseModel):
    """Input for Serply News Search."""
    search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
--- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py
@@ -5,6 +5,7 @@ from typing import Type, Any, Optional
 from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.base_tool import BaseTool
 class SerplyScholarSearchToolSchema(BaseModel):
    """Input for Serply Scholar Search."""
    search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
--- a/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py
+++ b/src/crewai_tools/tools/serply_api_tool/serply_web_to_markdown_tool.py
@@ -0,0 +1,49 @@
 import os
 import requests
 from urllib.parse import urlencode
 from typing import Type, Any, Optional
 from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.rag.rag_tool import RagTool
 class SerplyWebpageToMarkdownToolSchema(BaseModel):
    """Input for Serply Scholar Search."""
    url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
 class SerplyWebpageToMarkdownTool(RagTool):
    name: str = "Webpage to Markdown"
    description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
    args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
    request_url: str = "https://api.serply.io/v1/request"
    proxy_location: Optional[str] = "US"
    headers: Optional[dict] = {}
    def __init__(
            self,
            proxy_location: Optional[str] = "US",
            **kwargs
    ):
        """
            proxy_location: (str): Where to get news, specifically for a specific country results.
                 ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
        """
        super().__init__(**kwargs)
        self.proxy_location = proxy_location
        self.headers = {
            "X-API-KEY": os.environ["SERPLY_API_KEY"],
            "User-Agent": "crew-tools",
            "X-Proxy-Location": proxy_location
        }
    def _run(
            self,
            **kwargs: Any,
    ) -> Any:
        data = {
            "url": kwargs["url"],
            "method": "get",
            "response_type": "markdown"
        }
        response = requests.request("POST", self.request_url, headers=self.headers, json=data)
        return response.text