adding webpage to markdown

This commit is contained in:
teampen
2024-06-10 21:34:53 -04:00
parent d8b8edab08
commit ffe3829cef
6 changed files with 95 additions and 3 deletions

View File

@@ -25,5 +25,6 @@ from .tools import (
YoutubeVideoSearchTool,
SerplyWebSearchTool,
SerplyNewsSearchTool,
SerplyScholarSearchTool
SerplyScholarSearchTool,
SerplyWebpageToMarkdownTool
)

View File

@@ -24,3 +24,4 @@ from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSea
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
from .serply_api_tool.serply_web_to_markdown_tool import SerplyWebpageToMarkdownTool

View File

@@ -55,6 +55,45 @@ tool = SerplyScholarSearchTool()
tool = SerplyScholarSearchTool(proxy_location="GB")
```
## Web Page To Markdown
The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
```python
from crewai_tools import SerplyWebpageToMarkdownTool
# Initialize the tool for internet searching capabilities
tool = SerplyWebpageToMarkdownTool()
# change country news (DE - Germany)
tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
```
## Combining Multiple Tools
The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
```python
from crewai import Agent
from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
search_tool = SerplyWebSearchTool()
convert_to_markdown = SerplyWebpageToMarkdownTool()
# Creating a senior researcher agent with memory and verbose mode
researcher = Agent(
role='Senior Researcher',
goal='Uncover groundbreaking technologies in {topic}',
verbose=True,
memory=True,
backstory=(
"Driven by curiosity, you're at the forefront of"
"innovation, eager to explore and share knowledge that could change"
"the world."
),
tools=[search_tool, convert_to_markdown],
allow_delegation=True
)
```
## Steps to Get Started
To effectively use the `SerplyApiTool`, follow these steps:

View File

@@ -5,6 +5,7 @@ from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")

View File

@@ -5,6 +5,7 @@ from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
@@ -41,8 +42,8 @@ class SerplyScholarSearchTool(BaseTool):
}
def _run(
self,
**kwargs: Any,
self,
**kwargs: Any,
) -> Any:
query_payload = {
"hl": self.hl

View File

@@ -0,0 +1,49 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyWebpageToMarkdownToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
class SerplyWebpageToMarkdownTool(RagTool):
name: str = "Webpage to Markdown"
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
request_url: str = "https://api.serply.io/v1/request"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
proxy_location: Optional[str] = "US",
**kwargs
):
"""
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
data = {
"url": kwargs["url"],
"method": "get",
"response_type": "markdown"
}
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
return response.text