Merge branch 'main' into add-more-parameters-to-serperdev-search-payload

This commit is contained in:
João Moura
2024-07-14 13:48:54 -07:00
committed by GitHub
47 changed files with 1690 additions and 52 deletions

View File

@@ -1,26 +1,39 @@
from .tools.base_tool import BaseTool, Tool, tool
from .tools import (
BrowserbaseLoadTool,
CodeDocsSearchTool,
CSVSearchTool,
DirectorySearchTool,
DOCXSearchTool,
DirectoryReadTool,
EXASearchTool,
FileReadTool,
GithubSearchTool,
SerperDevTool,
TXTSearchTool,
JSONSearchTool,
MDXSearchTool,
PDFSearchTool,
PGSearchTool,
RagTool,
ScrapeElementFromWebsiteTool,
ScrapeWebsiteTool,
SeleniumScrapingTool,
WebsiteSearchTool,
XMLSearchTool,
YoutubeChannelSearchTool,
YoutubeVideoSearchTool,
)
BrowserbaseLoadTool,
CodeDocsSearchTool,
CodeInterpreterTool,
ComposioTool,
CSVSearchTool,
DirectoryReadTool,
DirectorySearchTool,
DOCXSearchTool,
EXASearchTool,
FileReadTool,
FirecrawlCrawlWebsiteTool,
FirecrawlScrapeWebsiteTool,
FirecrawlSearchTool,
GithubSearchTool,
JSONSearchTool,
LlamaIndexTool,
MDXSearchTool,
MultiOnTool,
PDFSearchTool,
PGSearchTool,
RagTool,
ScrapeElementFromWebsiteTool,
ScrapflyScrapeWebsiteTool,
ScrapeWebsiteTool,
SeleniumScrapingTool,
SerperDevTool,
SerplyWebSearchTool,
SerplyNewsSearchTool,
SerplyScholarSearchTool,
SerplyWebpageToMarkdownTool,
SerplyJobSearchTool,
TXTSearchTool,
WebsiteSearchTool,
XMLSearchTool,
YoutubeChannelSearchTool,
YoutubeVideoSearchTool
)
from .tools.base_tool import BaseTool, Tool, tool

View File

@@ -0,0 +1,32 @@
from typing import Any, Optional
from embedchain import App
from crewai_tools.tools.rag.rag_tool import Adapter
class PDFEmbedchainAdapter(Adapter):
embedchain_app: App
summarize: bool = False
src: Optional[str] = None
def query(self, question: str) -> str:
where = (
{"app_id": self.embedchain_app.config.id, "source": self.src}
if self.src
else None
)
result, sources = self.embedchain_app.query(
question, citations=True, dry_run=(not self.summarize), where=where
)
if self.summarize:
return result
return "\n\n".join([source[0] for source in sources])
def add(
self,
*args: Any,
**kwargs: Any,
) -> None:
self.src = args[0] if args else None
self.embedchain_app.add(*args, **kwargs)

View File

@@ -1,23 +1,40 @@
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
from .composio_tool.composio_tool import ComposioTool
from .csv_search_tool.csv_search_tool import CSVSearchTool
from .directory_search_tool.directory_search_tool import DirectorySearchTool
from .directory_read_tool.directory_read_tool import DirectoryReadTool
from .directory_search_tool.directory_search_tool import DirectorySearchTool
from .docx_search_tool.docx_search_tool import DOCXSearchTool
from .exa_tools.exa_search_tool import EXASearchTool
from .file_read_tool.file_read_tool import FileReadTool
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import FirecrawlCrawlWebsiteTool
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import FirecrawlScrapeWebsiteTool
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
from .github_search_tool.github_search_tool import GithubSearchTool
from .serper_dev_tool.serper_dev_tool import SerperDevTool
from .txt_search_tool.txt_search_tool import TXTSearchTool
from .json_search_tool.json_search_tool import JSONSearchTool
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
from .multion_tool.multion_tool import MultiOnTool
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
from .pg_seach_tool.pg_search_tool import PGSearchTool
from .rag.rag_tool import RagTool
from .scrape_element_from_website.scrape_element_from_website import ScrapeElementFromWebsiteTool
from .scrape_element_from_website.scrape_element_from_website import (
ScrapeElementFromWebsiteTool,
)
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ScrapflyScrapeWebsiteTool
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
from .serper_dev_tool.serper_dev_tool import SerperDevTool
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
from .txt_search_tool.txt_search_tool import TXTSearchTool
from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import (
YoutubeChannelSearchTool,
)
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchToo

View File

@@ -22,6 +22,8 @@ class BaseTool(BaseModel, ABC):
"""Flag to check if the description has been updated."""
cache_function: Optional[Callable] = lambda _args, _result: True
"""Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
result_as_answer: bool = False
"""Flag to check if the tool should be the final agent answer."""
@validator("args_schema", always=True, pre=True)
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
@@ -85,12 +87,15 @@ class BaseTool(BaseModel, ABC):
def _generate_description(self):
args = []
args_description = []
for arg, attribute in self.args_schema.schema()["properties"].items():
if "type" in attribute:
args.append(f"{arg}: '{attribute['type']}'")
if "description" in attribute:
args_description.append(f"{arg}: '{attribute['description']}'")
description = self.description.replace("\n", " ")
self.description = f"{self.name}({', '.join(args)}) - {description}"
self.description = f"{self.name}({', '.join(args)}) - {description} {', '.join(args_description)}"
class Tool(BaseTool):

View File

@@ -57,4 +57,4 @@ class CodeDocsSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -0,0 +1,14 @@
FROM python:3.11-slim
# Install common utilities
RUN apt-get update && apt-get install -y \
build-essential \
curl \
wget \
software-properties-common
# Clean up
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
# Set the working directory
WORKDIR /workspace

View File

@@ -0,0 +1,29 @@
# CodeInterpreterTool
## Description
This tool is used to give the Agent the ability to run code (Python3) from the code generated by the Agent itself. The code is executed in a sandboxed environment, so it is safe to run any code.
It is incredible useful since it allows the Agent to generate code, run it in the same environment, get the result and use it to make decisions.
## Requirements
- Docker
## Installation
Install the crewai_tools package
```shell
pip install 'crewai[tools]'
```
## Example
Remember that when using this tool, the code must be generated by the Agent itself. The code must be a Python3 code. And it will take some time for the first time to run because it needs to build the Docker image.
```python
from crewai_tools import CodeInterpreterTool
Agent(
...
tools=[CodeInterpreterTool()],
)
```

View File

@@ -0,0 +1,94 @@
import importlib.util
import os
from typing import List, Optional, Type
import docker
from crewai_tools.tools.base_tool import BaseTool
from pydantic.v1 import BaseModel, Field
class CodeInterpreterSchema(BaseModel):
"""Input for CodeInterpreterTool."""
code: str = Field(
...,
description="Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code",
)
libraries_used: List[str] = Field(
...,
description="List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4",
)
class CodeInterpreterTool(BaseTool):
name: str = "Code Interpreter"
description: str = "Interprets Python3 code strings with a final print statement."
args_schema: Type[BaseModel] = CodeInterpreterSchema
code: Optional[str] = None
@staticmethod
def _get_installed_package_path():
spec = importlib.util.find_spec("crewai_tools")
return os.path.dirname(spec.origin)
def _verify_docker_image(self) -> None:
"""
Verify if the Docker image is available
"""
image_tag = "code-interpreter:latest"
client = docker.from_env()
try:
client.images.get(image_tag)
except docker.errors.ImageNotFound:
package_path = self._get_installed_package_path()
dockerfile_path = os.path.join(package_path, "tools/code_interpreter_tool")
if not os.path.exists(dockerfile_path):
raise FileNotFoundError(f"Dockerfile not found in {dockerfile_path}")
client.images.build(
path=dockerfile_path,
tag=image_tag,
rm=True,
)
def _run(self, **kwargs) -> str:
code = kwargs.get("code", self.code)
libraries_used = kwargs.get("libraries_used", [])
return self.run_code_in_docker(code, libraries_used)
def _install_libraries(
self, container: docker.models.containers.Container, libraries: List[str]
) -> None:
"""
Install missing libraries in the Docker container
"""
for library in libraries:
container.exec_run(f"pip install {library}")
def _init_docker_container(self) -> docker.models.containers.Container:
client = docker.from_env()
return client.containers.run(
"code-interpreter",
detach=True,
tty=True,
working_dir="/workspace",
name="code-interpreter",
)
def run_code_in_docker(self, code: str, libraries_used: List[str]) -> str:
self._verify_docker_image()
container = self._init_docker_container()
self._install_libraries(container, libraries_used)
cmd_to_run = f'python3 -c "{code}"'
exec_result = container.exec_run(cmd_to_run)
container.stop()
container.remove()
if exec_result.exit_code != 0:
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
return exec_result.output.decode("utf-8")

View File

@@ -0,0 +1,72 @@
# ComposioTool Documentation
## Description
This tools is a wrapper around the composio toolset and gives your agent access to a wide variety of tools from the composio SDK.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install composio-core
pip install 'crewai[tools]'
```
after the installation is complete, either run `composio login` or export your composio API key as `COMPOSIO_API_KEY`.
## Example
The following example demonstrates how to initialize the tool and execute a github action:
1. Initialize toolset
```python
from composio import App
from crewai_tools import ComposioTool
from crewai import Agent, Task
tools = [ComposioTool.from_action(action=Action.GITHUB_ACTIVITY_STAR_REPO_FOR_AUTHENTICATED_USER)]
```
If you don't know what action you want to use, use `from_app` and `tags` filter to get relevant actions
```python
tools = ComposioTool.from_app(App.GITHUB, tags=["important"])
```
or use `use_case` to search relevant actions
```python
tools = ComposioTool.from_app(App.GITHUB, use_case="Star a github repository")
```
2. Define agent
```python
crewai_agent = Agent(
role="Github Agent",
goal="You take action on Github using Github APIs",
backstory=(
"You are AI agent that is responsible for taking actions on Github "
"on users behalf. You need to take action on Github using Github APIs"
),
verbose=True,
tools=tools,
)
```
3. Execute task
```python
task = Task(
description="Star a repo ComposioHQ/composio on GitHub",
agent=crewai_agent,
expected_output="if the star happened",
)
task.execute()
```
* More detailed list of tools can be found [here](https://app.composio.dev)

View File

@@ -0,0 +1,122 @@
"""
Composio tools wrapper.
"""
import typing as t
import typing_extensions as te
from crewai_tools.tools.base_tool import BaseTool
class ComposioTool(BaseTool):
"""Wrapper for composio tools."""
composio_action: t.Callable
def _run(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
"""Run the composio action with given arguments."""
return self.composio_action(*args, **kwargs)
@staticmethod
def _check_connected_account(tool: t.Any, toolset: t.Any) -> None:
"""Check if connected account is required and if required it exists or not."""
from composio import Action
from composio.client.collections import ConnectedAccountModel
tool = t.cast(Action, tool)
if tool.no_auth:
return
connections = t.cast(
t.List[ConnectedAccountModel],
toolset.client.connected_accounts.get(),
)
if tool.app not in [connection.appUniqueId for connection in connections]:
raise RuntimeError(
f"No connected account found for app `{tool.app}`; "
f"Run `composio add {tool.app}` to fix this"
)
@classmethod
def from_action(
cls,
action: t.Any,
**kwargs: t.Any,
) -> te.Self:
"""Wrap a composio tool as crewAI tool."""
from composio import Action, ComposioToolSet
from composio.constants import DEFAULT_ENTITY_ID
from composio.utils.shared import json_schema_to_model
toolset = ComposioToolSet()
if not isinstance(action, Action):
action = Action(action)
action = t.cast(Action, action)
cls._check_connected_account(
tool=action,
toolset=toolset,
)
(action_schema,) = toolset.get_action_schemas(actions=[action])
schema = action_schema.model_dump(exclude_none=True)
entity_id = kwargs.pop("entity_id", DEFAULT_ENTITY_ID)
def function(**kwargs: t.Any) -> t.Dict:
"""Wrapper function for composio action."""
return toolset.execute_action(
action=Action(schema["name"]),
params=kwargs,
entity_id=entity_id,
)
function.__name__ = schema["name"]
function.__doc__ = schema["description"]
return cls(
name=schema["name"],
description=schema["description"],
args_schema=json_schema_to_model(
action_schema.parameters.model_dump(
exclude_none=True,
)
),
composio_action=function,
**kwargs,
)
@classmethod
def from_app(
cls,
*apps: t.Any,
tags: t.Optional[t.List[str]] = None,
use_case: t.Optional[str] = None,
**kwargs: t.Any,
) -> t.List[te.Self]:
"""Create toolset from an app."""
if len(apps) == 0:
raise ValueError("You need to provide at least one app name")
if use_case is None and tags is None:
raise ValueError("Both `use_case` and `tags` cannot be `None`")
if use_case is not None and tags is not None:
raise ValueError(
"Cannot use both `use_case` and `tags` to filter the actions"
)
from composio import ComposioToolSet
toolset = ComposioToolSet()
if use_case is not None:
return [
cls.from_action(action=action, **kwargs)
for action in toolset.find_actions_by_use_case(*apps, use_case=use_case)
]
return [
cls.from_action(action=action, **kwargs)
for action in toolset.find_actions_by_tags(*apps, tags=tags)
]

View File

@@ -57,4 +57,4 @@ class CSVSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -57,4 +57,4 @@ class DirectorySearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -63,4 +63,4 @@ class DOCXSearchTool(RagTool):
docx = kwargs.get("docx")
if docx is not None:
self.add(docx)
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -0,0 +1,42 @@
# FirecrawlCrawlWebsiteTool
## Description
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
## Installation
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
```
pip install firecrawl-py 'crewai[tools]'
```
## Example
Utilize the FirecrawlScrapeFromWebsiteTool as follows to allow your agent to load websites:
```python
from crewai_tools import FirecrawlCrawlWebsiteTool
tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev')
```
## Arguments
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
- `url`: The base URL to start crawling from.
- `page_options`: Optional.
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
- `crawler_options`: Optional. Options for controlling the crawling behavior.
- `includes`: Optional. URL patterns to include in the crawl.
- `exclude`: Optional. URL patterns to exclude from the crawl.
- `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan).
- `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents.
- `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.
- `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites.
- `limit`: Optional. Maximum number of pages to crawl.
- `timeout`: Optional. Timeout in milliseconds for the crawling operation.

View File

@@ -0,0 +1,33 @@
from typing import Optional, Any, Type, Dict, List
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
class FirecrawlCrawlWebsiteTool(BaseTool):
name: str = "Firecrawl web crawl tool"
description: str = "Crawl webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
api_key: Optional[str] = None
firecrawl: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
options = {
"crawlerOptions": crawler_options,
"pageOptions": page_options
}
return self.firecrawl.crawl_url(url, options)

View File

@@ -0,0 +1,38 @@
# FirecrawlScrapeWebsiteTool
## Description
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
## Installation
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
```
pip install firecrawl-py 'crewai[tools]'
```
## Example
Utilize the FirecrawlScrapeWebsiteTool as follows to allow your agent to load websites:
```python
from crewai_tools import FirecrawlScrapeWebsiteTool
tool = FirecrawlScrapeWebsiteTool(url='firecrawl.dev')
```
## Arguments
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
- `url`: The URL to scrape.
- `page_options`: Optional.
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
- `extractor_options`: Optional. Options for LLM-based extraction of structured information from the page content
- `mode`: The extraction mode to use, currently supports 'llm-extraction'
- `extractionPrompt`: Optional. A prompt describing what information to extract from the page
- `extractionSchema`: Optional. The schema for the data to be extracted
- `timeout`: Optional. Timeout in milliseconds for the request

View File

@@ -0,0 +1,35 @@
from typing import Optional, Any, Type, Dict
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
timeout: Optional[int] = Field(default=None, description="Timeout for the scraping operation")
class FirecrawlScrapeWebsiteTool(BaseTool):
name: str = "Firecrawl web scrape tool"
description: str = "Scrape webpages url using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
api_key: Optional[str] = None
firecrawl: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
options = {
"pageOptions": page_options,
"extractorOptions": extractor_options,
"timeout": timeout
}
return self.firecrawl.scrape_url(url, options)

View File

@@ -0,0 +1,35 @@
# FirecrawlSearchTool
## Description
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
## Installation
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
```
pip install firecrawl-py 'crewai[tools]'
```
## Example
Utilize the FirecrawlSearchTool as follows to allow your agent to load websites:
```python
from crewai_tools import FirecrawlSearchTool
tool = FirecrawlSearchTool(query='what is firecrawl?')
```
## Arguments
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
- `query`: The search query string to be used for searching.
- `page_options`: Optional. Options for result formatting.
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
- `fetchPageContent`: Optional. Fetch the full content of the page.
- `search_options`: Optional. Options for controlling the crawling behavior.
- `limit`: Optional. Maximum number of pages to crawl.

View File

@@ -0,0 +1,33 @@
from typing import Optional, Any, Type, Dict, List
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlSearchToolSchema(BaseModel):
query: str = Field(description="Search query")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
class FirecrawlSearchTool(BaseTool):
name: str = "Firecrawl web search tool"
description: str = "Search webpages using Firecrawl and return the results"
args_schema: Type[BaseModel] = FirecrawlSearchToolSchema
api_key: Optional[str] = None
firecrawl: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
options = {
"pageOptions": page_options,
"resultOptions": result_options
}
return self.firecrawl.search(query, options)

View File

@@ -68,4 +68,4 @@ class GithubSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -57,4 +57,4 @@ class JSONSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -0,0 +1,53 @@
# LlamaIndexTool Documentation
## Description
This tool is designed to be a general wrapper around LlamaIndex tools and query engines, enabling you to leverage LlamaIndex resources
in terms of RAG/agentic pipelines as tools to plug into CrewAI agents.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Example
The following example demonstrates how to initialize the tool and execute a search with a given query:
```python
from crewai_tools import LlamaIndexTool
# Initialize the tool from a LlamaIndex Tool
## Example 1: Initialize from FunctionTool
from llama_index.core.tools import FunctionTool
your_python_function = lambda ...: ...
og_tool = FunctionTool.from_defaults(your_python_function, name="<name>", description='<description>')
tool = LlamaIndexTool.from_tool(og_tool)
## Example 2: Initialize from LlamaHub Tools
from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
wolfram_spec = WolframAlphaToolSpec(app_id="<app_id>")
wolfram_tools = wolfram_spec.to_tool_list()
tools = [LlamaIndexTool.from_tool(t) for t in wolfram_tools]
# Initialize Tool from a LlamaIndex Query Engine
## NOTE: LlamaIndex has a lot of query engines, define whatever query engine you want
query_engine = index.as_query_engine()
query_tool = LlamaIndexTool.from_query_engine(
query_engine,
name="Uber 2019 10K Query Tool",
description="Use this tool to lookup the 2019 Uber 10K Annual Report"
)
```
## Steps to Get Started
To effectively use the `LlamaIndexTool`, follow these steps:
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **Install and use LlamaIndex**: Follow LlamaIndex documentation (https://docs.llamaindex.ai/) to setup a RAG/agent pipeline.

View File

@@ -0,0 +1,84 @@
import os
import json
import requests
from typing import Type, Any, cast, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class LlamaIndexTool(BaseTool):
"""Tool to wrap LlamaIndex tools/query engines."""
llama_index_tool: Any
def _run(
self,
*args: Any,
**kwargs: Any,
) -> Any:
"""Run tool."""
from llama_index.core.tools import BaseTool as LlamaBaseTool
tool = cast(LlamaBaseTool, self.llama_index_tool)
return tool(*args, **kwargs)
@classmethod
def from_tool(
cls,
tool: Any,
**kwargs: Any
) -> "LlamaIndexTool":
from llama_index.core.tools import BaseTool as LlamaBaseTool
if not isinstance(tool, LlamaBaseTool):
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
tool = cast(LlamaBaseTool, tool)
if tool.metadata.fn_schema is None:
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
return cls(
name=tool.metadata.name,
description=tool.metadata.description,
args_schema=args_schema,
llama_index_tool=tool,
**kwargs
)
@classmethod
def from_query_engine(
cls,
query_engine: Any,
name: Optional[str] = None,
description: Optional[str] = None,
return_direct: bool = False,
**kwargs: Any
) -> "LlamaIndexTool":
from llama_index.core.query_engine import BaseQueryEngine
from llama_index.core.tools import QueryEngineTool
if not isinstance(query_engine, BaseQueryEngine):
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
# NOTE: by default the schema expects an `input` variable. However this
# confuses crewAI so we are renaming to `query`.
class QueryToolSchema(BaseModel):
"""Schema for query tool."""
query: str = Field(..., description="Search query for the query tool.")
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
query_engine_tool = QueryEngineTool.from_defaults(
query_engine,
name=name,
description=description,
return_direct=return_direct,
resolve_input_errors=True,
)
# HACK: we are replacing the schema with our custom schema
query_engine_tool.metadata.fn_schema = QueryToolSchema
return cls.from_tool(
query_engine_tool,
**kwargs
)

View File

@@ -57,4 +57,4 @@ class MDXSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -0,0 +1,54 @@
# MultiOnTool Documentation
## Description
The MultiOnTool, integrated within the crewai_tools package, empowers CrewAI agents with the capability to navigate and interact with the web through natural language instructions. Leveraging the Multion API, this tool facilitates seamless web browsing, making it an essential asset for projects requiring dynamic web data interaction.
## Installation
Ensure the `crewai[tools]` package is installed in your environment to use the MultiOnTool. If it's not already installed, you can add it using the command below:
```shell
pip install 'crewai[tools]'
```
## Example
The following example demonstrates how to initialize the tool and execute a search with a given query:
```python
from crewai import Agent, Task, Crew
from crewai_tools import MultiOnTool
# Initialize the tool from a MultiOn Tool
multion_tool = MultiOnTool(api_key= "YOUR_MULTION_API_KEY", local=False)
Browser = Agent(
role="Browser Agent",
goal="control web browsers using natural language ",
backstory="An expert browsing agent.",
tools=[multion_remote_tool],
verbose=True,
)
# example task to search and summarize news
browse = Task(
description="Summarize the top 3 trending AI News headlines",
expected_output="A summary of the top 3 trending AI News headlines",
agent=Browser,
)
crew = Crew(agents=[Browser], tasks=[browse])
crew.kickoff()
```
## Arguments
- `api_key`: Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
- `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked.
- `max_steps`: Optional. Set the max_steps the multion agent can take for a command
## Steps to Get Started
To effectively use the `MultiOnTool`, follow these steps:
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension).
3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API

View File

@@ -0,0 +1,29 @@
import os
from crewai import Agent, Crew, Task
from multion_tool import MultiOnTool
os.environ["OPENAI_API_KEY"] = "Your Key"
multion_browse_tool = MultiOnTool(api_key="Your Key")
# Create a new agent
Browser = Agent(
role="Browser Agent",
goal="control web browsers using natural language ",
backstory="An expert browsing agent.",
tools=[multion_browse_tool],
verbose=True,
)
# Define tasks
browse = Task(
description="Summarize the top 3 trending AI News headlines",
expected_output="A summary of the top 3 trending AI News headlines",
agent=Browser,
)
crew = Crew(agents=[Browser], tasks=[browse])
crew.kickoff()

View File

@@ -0,0 +1,65 @@
"""Multion tool spec."""
from typing import Any, Optional
from crewai_tools.tools.base_tool import BaseTool
class MultiOnTool(BaseTool):
"""Tool to wrap MultiOn Browse Capabilities."""
name: str = "Multion Browse Tool"
description: str = """Multion gives the ability for LLMs to control web browsers using natural language instructions.
If the status is 'CONTINUE', reissue the same instruction to continue execution
"""
multion: Optional[Any] = None
session_id: Optional[str] = None
local: bool = False
max_steps: int = 3
def __init__(
self,
api_key: Optional[str] = None,
local: bool = False,
max_steps: int = 3,
**kwargs,
):
super().__init__(**kwargs)
try:
from multion.client import MultiOn # type: ignore
except ImportError:
raise ImportError(
"`multion` package not found, please run `pip install multion`"
)
self.session_id = None
self.local = local
self.multion = MultiOn(api_key=api_key)
self.max_steps = max_steps
def _run(
self,
cmd: str,
*args: Any,
**kwargs: Any,
) -> str:
"""
Run the Multion client with the given command.
Args:
cmd (str): The detailed and specific natural language instructrion for web browsing
*args (Any): Additional arguments to pass to the Multion client
**kwargs (Any): Additional keyword arguments to pass to the Multion client
"""
browse = self.multion.browse(
cmd=cmd,
session_id=self.session_id,
local=self.local,
max_steps=self.max_steps,
*args,
**kwargs,
)
self.session_id = browse.session_id
return browse.message + "\n\n STATUS: " + browse.status

View File

@@ -1,6 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic import model_validator
from pydantic.v1 import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -35,6 +36,22 @@ class PDFSearchTool(RagTool):
self.args_schema = FixedPDFSearchToolSchema
self._generate_description()
@model_validator(mode="after")
def _set_default_adapter(self):
if isinstance(self.adapter, RagTool._AdapterPlaceholder):
from embedchain import App
from crewai_tools.adapters.pdf_embedchain_adapter import (
PDFEmbedchainAdapter,
)
app = App.from_config(config=self.config) if self.config else App()
self.adapter = PDFEmbedchainAdapter(
embedchain_app=app, summarize=self.summarize
)
return self
def add(
self,
*args: Any,

View File

@@ -0,0 +1,66 @@
from typing import Any, Optional, Type
from pydantic import BaseModel, Field
from pypdf import PdfReader, PdfWriter, PageObject, ContentStream, NameObject, Font
from pathlib import Path
class PDFTextWritingToolSchema(BaseModel):
"""Input schema for PDFTextWritingTool."""
pdf_path: str = Field(..., description="Path to the PDF file to modify")
text: str = Field(..., description="Text to add to the PDF")
position: tuple = Field(..., description="Tuple of (x, y) coordinates for text placement")
font_size: int = Field(default=12, description="Font size of the text")
font_color: str = Field(default="0 0 0 rg", description="RGB color code for the text")
font_name: Optional[str] = Field(default="F1", description="Font name for standard fonts")
font_file: Optional[str] = Field(None, description="Path to a .ttf font file for custom font usage")
page_number: int = Field(default=0, description="Page number to add text to")
class PDFTextWritingTool(RagTool):
"""A tool to add text to specific positions in a PDF, with custom font support."""
name: str = "PDF Text Writing Tool"
description: str = "A tool that can write text to a specific position in a PDF document, with optional custom font embedding."
args_schema: Type[BaseModel] = PDFTextWritingToolSchema
def run(self, pdf_path: str, text: str, position: tuple, font_size: int, font_color: str,
font_name: str = "F1", font_file: Optional[str] = None, page_number: int = 0, **kwargs) -> str:
reader = PdfReader(pdf_path)
writer = PdfWriter()
if page_number >= len(reader.pages):
return "Page number out of range."
page: PageObject = reader.pages[page_number]
content = ContentStream(page["/Contents"].data, reader)
if font_file:
# Check if the font file exists
if not Path(font_file).exists():
return "Font file does not exist."
# Embed the custom font
font_name = self.embed_font(writer, font_file)
# Prepare text operation with the custom or standard font
x_position, y_position = position
text_operation = f"BT /{font_name} {font_size} Tf {x_position} {y_position} Td ({text}) Tj ET"
content.operations.append([font_color]) # Set color
content.operations.append([text_operation]) # Add text
# Replace old content with new content
page[NameObject("/Contents")] = content
writer.add_page(page)
# Save the new PDF
output_pdf_path = "modified_output.pdf"
with open(output_pdf_path, "wb") as out_file:
writer.write(out_file)
return f"Text added to {output_pdf_path} successfully."
def embed_font(self, writer: PdfWriter, font_file: str) -> str:
"""Embeds a TTF font into the PDF and returns the font name."""
with open(font_file, "rb") as file:
font = Font.true_type(file.read())
font_ref = writer.add_object(font)
return font_ref

View File

@@ -41,4 +41,4 @@ class PGSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -25,8 +25,7 @@ class ScrapeWebsiteTool(BaseTool):
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.google.com/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Accept-Encoding': 'gzip, deflate, br'
'Upgrade-Insecure-Requests': '1'
}
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):

View File

@@ -0,0 +1,57 @@
# ScrapflyScrapeWebsiteTool
## Description
[ScrapFly](https://scrapfly.io/) is a web scraping API with headless browser capabilities, proxies, and anti-bot bypass. It allows for extracting web page data into accessible LLM markdown or text.
## Setup and Installation
1. **Install ScrapFly Python SDK**: Install `scrapfly-sdk` Python package is installed to use the ScrapFly Web Loader. Install it via pip with the following command:
```bash
pip install scrapfly-sdk
```
2. **API Key**: Register for free from [scrapfly.io/register](https://www.scrapfly.io/register/) to obtain your API key.
## Example Usage
Utilize the ScrapflyScrapeWebsiteTool as follows to retrieve a web page data as text, markdown (LLM accissible) or HTML:
```python
from crewai_tools import ScrapflyScrapeWebsiteTool
tool = ScrapflyScrapeWebsiteTool(
api_key="Your ScrapFly API key"
)
result = tool._run(
url="https://web-scraping.dev/products",
scrape_format="markdown",
ignore_scrape_failures=True
)
```
## Additional Arguments
The ScrapflyScrapeWebsiteTool also allows passigng ScrapeConfig object for customizing the scrape request. See the [API params documentation](https://scrapfly.io/docs/scrape-api/getting-started) for the full feature details and their API params:
```python
from crewai_tools import ScrapflyScrapeWebsiteTool
tool = ScrapflyScrapeWebsiteTool(
api_key="Your ScrapFly API key"
)
scrapfly_scrape_config = {
"asp": True, # Bypass scraping blocking and solutions, like Cloudflare
"render_js": True, # Enable JavaScript rendering with a cloud headless browser
"proxy_pool": "public_residential_pool", # Select a proxy pool (datacenter or residnetial)
"country": "us", # Select a proxy location
"auto_scroll": True, # Auto scroll the page
"js": "" # Execute custom JavaScript code by the headless browser
}
result = tool._run(
url="https://web-scraping.dev/products",
scrape_format="markdown",
ignore_scrape_failures=True,
scrape_config=scrapfly_scrape_config
)
```

View File

@@ -0,0 +1,47 @@
import logging
from typing import Optional, Any, Type, Dict, Literal
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
logger = logging.getLogger(__file__)
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Webpage URL")
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format")
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config")
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures")
class ScrapflyScrapeWebsiteTool(BaseTool):
name: str = "Scrapfly web scraping API tool"
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text"
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
api_key: str = None
scrapfly: Optional[Any] = None
def __init__(self, api_key: str):
super().__init__()
try:
from scrapfly import ScrapflyClient
except ImportError:
raise ImportError(
"`scrapfly` package not found, please run `pip install scrapfly-sdk`"
)
self.scrapfly = ScrapflyClient(key=api_key)
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None):
from scrapfly import ScrapeApiResponse, ScrapeConfig
scrape_config = scrape_config if scrape_config is not None else {}
try:
response: ScrapeApiResponse = self.scrapfly.scrape(
ScrapeConfig(url, format=scrape_format, **scrape_config)
)
return response.scrape_result["content"]
except Exception as e:
if ignore_scrape_failures:
logger.error(f"Error fetching data from {url}, exception: {e}")
return None
else:
raise e

View File

@@ -5,7 +5,7 @@ from pydantic.v1 import BaseModel, Field
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
from ..base_tool import BaseTool

View File

@@ -6,6 +6,14 @@ from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, 'w') as file:
file.write(content)
print(f"Results saved to {filename}")
class SerperDevToolSchema(BaseModel):
"""Input for SerperDevTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
@@ -15,26 +23,31 @@ class SerperDevTool(BaseTool):
description: str = "A tool that can be used to search the internet with a search_query."
args_schema: Type[BaseModel] = SerperDevToolSchema
search_url: str = "https://google.serper.dev/search"
n_results: int = 10
country: Optional[str] = None
location: Optional[str] = None
locale: Optional[str] = None
n_results: int = Field(default=10, description="Number of search results to return")
save_file: bool = Field(default=False, description="Flag to determine whether to save the results to a file")
def _run(
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get('search_query') or kwargs.get('query')
save_file = kwargs.get('save_file', self.save_file)
n_results = kwargs.get('n_results', self.n_results)
payload = json.dumps(
{
"q": search_query,
"num": self.n_results,
"num": n_results,
"gl": self.country,
"location": self.location,
"hl": self.locale,
}
)
headers = {
'X-API-KEY': os.environ['SERPER_API_KEY'],
'content-type': 'application/json'
@@ -42,7 +55,7 @@ class SerperDevTool(BaseTool):
response = requests.request("POST", self.search_url, headers=headers, data=payload)
results = response.json()
if 'organic' in results:
results = results['organic']
results = results['organic'][:self.n_results]
string = []
for result in results:
try:
@@ -53,9 +66,11 @@ class SerperDevTool(BaseTool):
"---"
]))
except KeyError:
next
continue
content = '\n'.join(string)
if save_file:
_save_results_to_file(content)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -0,0 +1,117 @@
# Serply API Documentation
## Description
This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Examples
## Web Search
The following example demonstrates how to initialize the tool and execute a search the web with a given query:
```python
from crewai_tools import SerplyWebSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyWebSearchTool()
# increase search limits to 100 results
tool = SerplyWebSearchTool(limit=100)
# change results language (fr - French)
tool = SerplyWebSearchTool(hl="fr")
```
## News Search
The following example demonstrates how to initialize the tool and execute a search news with a given query:
```python
from crewai_tools import SerplyNewsSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyNewsSearchTool()
# change country news (JP - Japan)
tool = SerplyNewsSearchTool(proxy_location="JP")
```
## Scholar Search
The following example demonstrates how to initialize the tool and execute a search scholar articles a given query:
```python
from crewai_tools import SerplyScholarSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyScholarSearchTool()
# change country news (GB - Great Britain)
tool = SerplyScholarSearchTool(proxy_location="GB")
```
## Job Search
The following example demonstrates how to initialize the tool and searching for jobs in the USA:
```python
from crewai_tools import SerplyJobSearchTool
# Initialize the tool for internet searching capabilities
tool = SerplyJobSearchTool()
```
## Web Page To Markdown
The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
```python
from crewai_tools import SerplyWebpageToMarkdownTool
# Initialize the tool for internet searching capabilities
tool = SerplyWebpageToMarkdownTool()
# change country make request from (DE - Germany)
tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
```
## Combining Multiple Tools
The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
```python
from crewai import Agent
from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
search_tool = SerplyWebSearchTool()
convert_to_markdown = SerplyWebpageToMarkdownTool()
# Creating a senior researcher agent with memory and verbose mode
researcher = Agent(
role='Senior Researcher',
goal='Uncover groundbreaking technologies in {topic}',
verbose=True,
memory=True,
backstory=(
"Driven by curiosity, you're at the forefront of"
"innovation, eager to explore and share knowledge that could change"
"the world."
),
tools=[search_tool, convert_to_markdown],
allow_delegation=True
)
```
## Steps to Get Started
To effectively use the `SerplyApiTool`, follow these steps:
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io).
3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool.
## Conclusion
By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.

View File

@@ -0,0 +1,75 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyJobSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
class SerplyJobSearchTool(RagTool):
name: str = "Job Search"
description: str = "A tool to perform to perform a job search in the US with a search_query."
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
request_url: str = "https://api.serply.io/v1/job/search/"
proxy_location: Optional[str] = "US"
"""
proxy_location: (str): Where to get jobs, specifically for a specific country results.
- Currently only supports US
"""
headers: Optional[dict] = {}
def __init__(
self,
**kwargs
):
super().__init__(**kwargs)
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": self.proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
query_payload = {}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.request_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
jobs = response.json().get("jobs", "")
if not jobs:
return ""
string = []
for job in jobs:
try:
string.append('\n'.join([
f"Position: {job['position']}",
f"Employer: {job['employer']}",
f"Location: {job['location']}",
f"Link: {job['link']}",
f"""Highest: {', '.join([h for h in job['highlights']])}""",
f"Is Remote: {job['is_remote']}",
f"Is Hybrid: {job['is_remote']}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"

View File

@@ -0,0 +1,81 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
class SerplyNewsSearchTool(BaseTool):
name: str = "News Search"
description: str = "A tool to perform News article search with a search_query."
args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema
search_url: str = "https://api.serply.io/v1/news/"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
limit: Optional[int] = 10
def __init__(
self,
limit: Optional[int] = 10,
proxy_location: Optional[str] = "US",
**kwargs
):
"""
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
# build query parameters
query_payload = {}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "entries" in results:
results = results['entries']
string = []
for result in results[:self.limit]:
try:
# follow url
r = requests.get(result['link'])
final_link = r.history[-1].headers['Location']
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {final_link}",
f"Source: {result['source']['title']}",
f"Published: {result['published']}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -0,0 +1,86 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
class SerplyScholarSearchTool(BaseTool):
name: str = "Scholar Search"
description: str = "A tool to perform News article search with a search_query."
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
search_url: str = "https://api.serply.io/v1/scholar/"
hl: Optional[str] = "us"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
proxy_location: Optional[str] = "US",
**kwargs
):
"""
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.hl = hl
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
query_payload = {
"hl": self.hl
}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(query_payload)}"
response = requests.request("GET", url, headers=self.headers)
articles = response.json().get("articles", "")
if not articles:
return ""
string = []
for article in articles:
try:
if "doc" in article:
link = article['doc']['link']
else:
link = article['link']
authors = [author['name'] for author in article['author']['authors']]
string.append('\n'.join([
f"Title: {article['title']}",
f"Link: {link}",
f"Description: {article['description']}",
f"Cite: {article['cite']}",
f"Authors: {', '.join(authors)}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"

View File

@@ -0,0 +1,93 @@
import os
import requests
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
class SerplyWebSearchTool(BaseTool):
name: str = "Google Search"
description: str = "A tool to perform Google search with a search_query."
args_schema: Type[BaseModel] = SerplyWebSearchToolSchema
search_url: str = "https://api.serply.io/v1/search/"
hl: Optional[str] = "us"
limit: Optional[int] = 10
device_type: Optional[str] = "desktop"
proxy_location: Optional[str] = "US"
query_payload: Optional[dict] = {}
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
limit: int = 10,
device_type: str = "desktop",
proxy_location: str = "US",
**kwargs
):
"""
param: query (str): The query to search for
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
param: device_type (str): desktop/mobile results (defaults to desktop)
proxy_location: (str): Where to perform the search, specifically for local/regional results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
self.device_type = device_type
self.proxy_location = proxy_location
# build query parameters
self.query_payload = {
"num": limit,
"gl": proxy_location.upper(),
"hl": hl.lower()
}
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"X-User-Agent": device_type,
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
if "query" in kwargs:
self.query_payload["q"] = kwargs["query"]
elif "search_query" in kwargs:
self.query_payload["q"] = kwargs["search_query"]
# build the url
url = f"{self.search_url}{urlencode(self.query_payload)}"
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "results" in results:
results = results['results']
string = []
for result in results:
try:
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Description: {result['description'].strip()}",
"---"
]))
except KeyError:
continue
content = '\n'.join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -0,0 +1,48 @@
import os
import requests
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyWebpageToMarkdownToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
class SerplyWebpageToMarkdownTool(RagTool):
name: str = "Webpage to Markdown"
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
request_url: str = "https://api.serply.io/v1/request"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
proxy_location: Optional[str] = "US",
**kwargs
):
"""
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
}
def _run(
self,
**kwargs: Any,
) -> Any:
data = {
"url": kwargs["url"],
"method": "GET",
"response_type": "markdown"
}
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
return response.text

View File

@@ -57,4 +57,4 @@ class TXTSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -57,4 +57,4 @@ class WebsiteSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -57,4 +57,4 @@ class XMLSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -60,4 +60,4 @@ class YoutubeChannelSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -57,4 +57,4 @@ class YoutubeVideoSearchTool(RagTool):
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)
return super()._run(query=search_query, **kwargs)

View File

@@ -0,0 +1,38 @@
import unittest
from unittest.mock import patch
from crewai_tools.tools.code_interpreter_tool.code_interpreter_tool import (
CodeInterpreterTool,
)
class TestCodeInterpreterTool(unittest.TestCase):
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
def test_run_code_in_docker(self, docker_mock):
tool = CodeInterpreterTool()
code = "print('Hello, World!')"
libraries_used = "numpy,pandas"
expected_output = "Hello, World!\n"
docker_mock.from_env().containers.run().exec_run().exit_code = 0
docker_mock.from_env().containers.run().exec_run().output = (
expected_output.encode()
)
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
def test_run_code_in_docker_with_error(self, docker_mock):
tool = CodeInterpreterTool()
code = "print(1/0)"
libraries_used = "numpy,pandas"
expected_output = "Something went wrong while running the code: \nZeroDivisionError: division by zero\n"
docker_mock.from_env().containers.run().exec_run().exit_code = 1
docker_mock.from_env().containers.run().exec_run().output = (
b"ZeroDivisionError: division by zero\n"
)
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)