mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
Merge branch 'main' into feature/add-mysql-tool
This commit is contained in:
@@ -1,27 +1,42 @@
|
||||
from .tools.base_tool import BaseTool, Tool, tool
|
||||
from .tools import (
|
||||
BrowserbaseLoadTool,
|
||||
CodeDocsSearchTool,
|
||||
CSVSearchTool,
|
||||
DirectorySearchTool,
|
||||
DOCXSearchTool,
|
||||
DirectoryReadTool,
|
||||
EXASearchTool,
|
||||
FileReadTool,
|
||||
GithubSearchTool,
|
||||
SerperDevTool,
|
||||
TXTSearchTool,
|
||||
JSONSearchTool,
|
||||
MDXSearchTool,
|
||||
PDFSearchTool,
|
||||
PGSearchTool,
|
||||
RagTool,
|
||||
ScrapeElementFromWebsiteTool,
|
||||
ScrapeWebsiteTool,
|
||||
SeleniumScrapingTool,
|
||||
WebsiteSearchTool,
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool,
|
||||
BrowserbaseLoadTool,
|
||||
CodeDocsSearchTool,
|
||||
CodeInterpreterTool,
|
||||
ComposioTool,
|
||||
CSVSearchTool,
|
||||
DirectoryReadTool,
|
||||
DirectorySearchTool,
|
||||
DOCXSearchTool,
|
||||
EXASearchTool,
|
||||
FileReadTool,
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
FirecrawlSearchTool,
|
||||
GithubSearchTool,
|
||||
JSONSearchTool,
|
||||
LlamaIndexTool,
|
||||
MDXSearchTool,
|
||||
MultiOnTool,
|
||||
NL2SQLTool,
|
||||
PDFSearchTool,
|
||||
PGSearchTool,
|
||||
RagTool,
|
||||
ScrapeElementFromWebsiteTool,
|
||||
ScrapeWebsiteTool,
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
SeleniumScrapingTool,
|
||||
SerperDevTool,
|
||||
SerplyJobSearchTool,
|
||||
SerplyNewsSearchTool,
|
||||
SerplyScholarSearchTool,
|
||||
SerplyWebpageToMarkdownTool,
|
||||
SerplyWebSearchTool,
|
||||
TXTSearchTool,
|
||||
VisionTool,
|
||||
WebsiteSearchTool,
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool,
|
||||
MySQLSearchTool
|
||||
)
|
||||
from .tools.base_tool import BaseTool, Tool, tool
|
||||
|
||||
32
src/crewai_tools/adapters/pdf_embedchain_adapter.py
Normal file
32
src/crewai_tools/adapters/pdf_embedchain_adapter.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from typing import Any, Optional
|
||||
|
||||
from embedchain import App
|
||||
|
||||
from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
|
||||
|
||||
class PDFEmbedchainAdapter(Adapter):
|
||||
embedchain_app: App
|
||||
summarize: bool = False
|
||||
src: Optional[str] = None
|
||||
|
||||
def query(self, question: str) -> str:
|
||||
where = (
|
||||
{"app_id": self.embedchain_app.config.id, "source": self.src}
|
||||
if self.src
|
||||
else None
|
||||
)
|
||||
result, sources = self.embedchain_app.query(
|
||||
question, citations=True, dry_run=(not self.summarize), where=where
|
||||
)
|
||||
if self.summarize:
|
||||
return result
|
||||
return "\n\n".join([source[0] for source in sources])
|
||||
|
||||
def add(
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.src = args[0] if args else None
|
||||
self.embedchain_app.add(*args, **kwargs)
|
||||
@@ -1,24 +1,50 @@
|
||||
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
|
||||
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
|
||||
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
|
||||
from .composio_tool.composio_tool import ComposioTool
|
||||
from .csv_search_tool.csv_search_tool import CSVSearchTool
|
||||
from .directory_search_tool.directory_search_tool import DirectorySearchTool
|
||||
from .directory_read_tool.directory_read_tool import DirectoryReadTool
|
||||
from .directory_search_tool.directory_search_tool import DirectorySearchTool
|
||||
from .docx_search_tool.docx_search_tool import DOCXSearchTool
|
||||
from .exa_tools.exa_search_tool import EXASearchTool
|
||||
from .file_read_tool.file_read_tool import FileReadTool
|
||||
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
)
|
||||
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
)
|
||||
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||
from .github_search_tool.github_search_tool import GithubSearchTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||
from .json_search_tool.json_search_tool import JSONSearchTool
|
||||
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
|
||||
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
||||
from .multion_tool.multion_tool import MultiOnTool
|
||||
from .nl2sql.nl2sql_tool import NL2SQLTool
|
||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
||||
from .rag.rag_tool import RagTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import ScrapeElementFromWebsiteTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import (
|
||||
ScrapeElementFromWebsiteTool,
|
||||
)
|
||||
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
||||
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
)
|
||||
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
|
||||
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
|
||||
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
|
||||
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
|
||||
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
|
||||
from .spider_tool.spider_tool import SpiderTool
|
||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||
from .vision_tool.vision_tool import VisionTool
|
||||
from .website_search.website_search_tool import WebsiteSearchTool
|
||||
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||
YoutubeChannelSearchTool,
|
||||
)
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
||||
|
||||
@@ -22,6 +22,8 @@ class BaseTool(BaseModel, ABC):
|
||||
"""Flag to check if the description has been updated."""
|
||||
cache_function: Optional[Callable] = lambda _args, _result: True
|
||||
"""Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
|
||||
result_as_answer: bool = False
|
||||
"""Flag to check if the tool should be the final agent answer."""
|
||||
|
||||
@validator("args_schema", always=True, pre=True)
|
||||
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
|
||||
@@ -85,12 +87,15 @@ class BaseTool(BaseModel, ABC):
|
||||
|
||||
def _generate_description(self):
|
||||
args = []
|
||||
args_description = []
|
||||
for arg, attribute in self.args_schema.schema()["properties"].items():
|
||||
if "type" in attribute:
|
||||
args.append(f"{arg}: '{attribute['type']}'")
|
||||
if "description" in attribute:
|
||||
args_description.append(f"{arg}: '{attribute['description']}'")
|
||||
|
||||
description = self.description.replace("\n", " ")
|
||||
self.description = f"{self.name}({', '.join(args)}) - {description}"
|
||||
self.description = f"{self.name}({', '.join(args)}) - {description} {', '.join(args_description)}"
|
||||
|
||||
|
||||
class Tool(BaseTool):
|
||||
|
||||
@@ -2,11 +2,17 @@
|
||||
|
||||
## Description
|
||||
|
||||
[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving.
|
||||
[Browserbase](https://browserbase.com) is a developer platform to reliably run, manage, and monitor headless browsers.
|
||||
|
||||
Power your AI data retrievals with:
|
||||
- [Serverless Infrastructure](https://docs.browserbase.com/under-the-hood) providing reliable browsers to extract data from complex UIs
|
||||
- [Stealth Mode](https://docs.browserbase.com/features/stealth-mode) with included fingerprinting tactics and automatic captcha solving
|
||||
- [Session Debugger](https://docs.browserbase.com/features/sessions) to inspect your Browser Session with networks timeline and logs
|
||||
- [Live Debug](https://docs.browserbase.com/guides/session-debug-connection/browser-remote-control) to quickly debug your automation
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`).
|
||||
- Get an API key and Project ID from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID`).
|
||||
- Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
@@ -25,5 +31,8 @@ tool = BrowserbaseLoadTool()
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
|
||||
- `text_content`: Optional. Load pages as readable text. Default is `False`.
|
||||
- `api_key` Optional. Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.
|
||||
- `project_id` Optional. Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.
|
||||
- `text_content` Retrieve only text content. Default is `False`.
|
||||
- `session_id` Optional. Provide an existing Session ID.
|
||||
- `proxy` Optional. Enable/Disable Proxies."
|
||||
|
||||
@@ -10,20 +10,35 @@ class BrowserbaseLoadTool(BaseTool):
|
||||
description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
|
||||
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
|
||||
api_key: Optional[str] = None
|
||||
project_id: Optional[str] = None
|
||||
text_content: Optional[bool] = False
|
||||
session_id: Optional[str] = None
|
||||
proxy: Optional[bool] = None
|
||||
browserbase: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
project_id: Optional[str] = None,
|
||||
text_content: Optional[bool] = False,
|
||||
session_id: Optional[str] = None,
|
||||
proxy: Optional[bool] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from browserbase import Browserbase # type: ignore
|
||||
from browserbase import Browserbase # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`browserbase` package not found, please run `pip install browserbase`"
|
||||
)
|
||||
raise ImportError(
|
||||
"`browserbase` package not found, please run `pip install browserbase`"
|
||||
)
|
||||
|
||||
self.browserbase = Browserbase(api_key=api_key)
|
||||
self.browserbase = Browserbase(api_key, project_id)
|
||||
self.text_content = text_content
|
||||
self.session_id = session_id
|
||||
self.proxy = proxy
|
||||
|
||||
def _run(self, url: str):
|
||||
return self.browserbase.load_url(url, text_content=self.text_content)
|
||||
return self.browserbase.load_url(
|
||||
url, self.text_content, self.session_id, self.proxy
|
||||
)
|
||||
|
||||
@@ -57,4 +57,4 @@ class CodeDocsSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
14
src/crewai_tools/tools/code_interpreter_tool/Dockerfile
Normal file
14
src/crewai_tools/tools/code_interpreter_tool/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install common utilities
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
wget \
|
||||
software-properties-common
|
||||
|
||||
# Clean up
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /workspace
|
||||
29
src/crewai_tools/tools/code_interpreter_tool/README.md
Normal file
29
src/crewai_tools/tools/code_interpreter_tool/README.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# CodeInterpreterTool
|
||||
|
||||
## Description
|
||||
This tool is used to give the Agent the ability to run code (Python3) from the code generated by the Agent itself. The code is executed in a sandboxed environment, so it is safe to run any code.
|
||||
|
||||
It is incredible useful since it allows the Agent to generate code, run it in the same environment, get the result and use it to make decisions.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Docker
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Remember that when using this tool, the code must be generated by the Agent itself. The code must be a Python3 code. And it will take some time for the first time to run because it needs to build the Docker image.
|
||||
|
||||
```python
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[CodeInterpreterTool()],
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,94 @@
|
||||
import importlib.util
|
||||
import os
|
||||
from typing import List, Optional, Type
|
||||
|
||||
import docker
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
"""Input for CodeInterpreterTool."""
|
||||
|
||||
code: str = Field(
|
||||
...,
|
||||
description="Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code",
|
||||
)
|
||||
|
||||
libraries_used: List[str] = Field(
|
||||
...,
|
||||
description="List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4",
|
||||
)
|
||||
|
||||
|
||||
class CodeInterpreterTool(BaseTool):
|
||||
name: str = "Code Interpreter"
|
||||
description: str = "Interprets Python3 code strings with a final print statement."
|
||||
args_schema: Type[BaseModel] = CodeInterpreterSchema
|
||||
code: Optional[str] = None
|
||||
|
||||
@staticmethod
|
||||
def _get_installed_package_path():
|
||||
spec = importlib.util.find_spec("crewai_tools")
|
||||
return os.path.dirname(spec.origin)
|
||||
|
||||
def _verify_docker_image(self) -> None:
|
||||
"""
|
||||
Verify if the Docker image is available
|
||||
"""
|
||||
image_tag = "code-interpreter:latest"
|
||||
client = docker.from_env()
|
||||
|
||||
try:
|
||||
client.images.get(image_tag)
|
||||
|
||||
except docker.errors.ImageNotFound:
|
||||
package_path = self._get_installed_package_path()
|
||||
dockerfile_path = os.path.join(package_path, "tools/code_interpreter_tool")
|
||||
if not os.path.exists(dockerfile_path):
|
||||
raise FileNotFoundError(f"Dockerfile not found in {dockerfile_path}")
|
||||
|
||||
client.images.build(
|
||||
path=dockerfile_path,
|
||||
tag=image_tag,
|
||||
rm=True,
|
||||
)
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
code = kwargs.get("code", self.code)
|
||||
libraries_used = kwargs.get("libraries_used", [])
|
||||
return self.run_code_in_docker(code, libraries_used)
|
||||
|
||||
def _install_libraries(
|
||||
self, container: docker.models.containers.Container, libraries: List[str]
|
||||
) -> None:
|
||||
"""
|
||||
Install missing libraries in the Docker container
|
||||
"""
|
||||
for library in libraries:
|
||||
container.exec_run(f"pip install {library}")
|
||||
|
||||
def _init_docker_container(self) -> docker.models.containers.Container:
|
||||
client = docker.from_env()
|
||||
return client.containers.run(
|
||||
"code-interpreter",
|
||||
detach=True,
|
||||
tty=True,
|
||||
working_dir="/workspace",
|
||||
name="code-interpreter",
|
||||
)
|
||||
|
||||
def run_code_in_docker(self, code: str, libraries_used: List[str]) -> str:
|
||||
self._verify_docker_image()
|
||||
container = self._init_docker_container()
|
||||
self._install_libraries(container, libraries_used)
|
||||
|
||||
cmd_to_run = f'python3 -c "{code}"'
|
||||
exec_result = container.exec_run(cmd_to_run)
|
||||
|
||||
container.stop()
|
||||
container.remove()
|
||||
|
||||
if exec_result.exit_code != 0:
|
||||
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
|
||||
return exec_result.output.decode("utf-8")
|
||||
72
src/crewai_tools/tools/composio_tool/README.md
Normal file
72
src/crewai_tools/tools/composio_tool/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# ComposioTool Documentation
|
||||
|
||||
## Description
|
||||
|
||||
This tools is a wrapper around the composio toolset and gives your agent access to a wide variety of tools from the composio SDK.
|
||||
|
||||
## Installation
|
||||
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```shell
|
||||
pip install composio-core
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
after the installation is complete, either run `composio login` or export your composio API key as `COMPOSIO_API_KEY`.
|
||||
|
||||
## Example
|
||||
|
||||
The following example demonstrates how to initialize the tool and execute a github action:
|
||||
|
||||
1. Initialize toolset
|
||||
|
||||
```python
|
||||
from composio import App
|
||||
from crewai_tools import ComposioTool
|
||||
from crewai import Agent, Task
|
||||
|
||||
|
||||
tools = [ComposioTool.from_action(action=Action.GITHUB_ACTIVITY_STAR_REPO_FOR_AUTHENTICATED_USER)]
|
||||
```
|
||||
|
||||
If you don't know what action you want to use, use `from_app` and `tags` filter to get relevant actions
|
||||
|
||||
```python
|
||||
tools = ComposioTool.from_app(App.GITHUB, tags=["important"])
|
||||
```
|
||||
|
||||
or use `use_case` to search relevant actions
|
||||
|
||||
```python
|
||||
tools = ComposioTool.from_app(App.GITHUB, use_case="Star a github repository")
|
||||
```
|
||||
|
||||
2. Define agent
|
||||
|
||||
```python
|
||||
crewai_agent = Agent(
|
||||
role="Github Agent",
|
||||
goal="You take action on Github using Github APIs",
|
||||
backstory=(
|
||||
"You are AI agent that is responsible for taking actions on Github "
|
||||
"on users behalf. You need to take action on Github using Github APIs"
|
||||
),
|
||||
verbose=True,
|
||||
tools=tools,
|
||||
)
|
||||
```
|
||||
|
||||
3. Execute task
|
||||
|
||||
```python
|
||||
task = Task(
|
||||
description="Star a repo ComposioHQ/composio on GitHub",
|
||||
agent=crewai_agent,
|
||||
expected_output="if the star happened",
|
||||
)
|
||||
|
||||
task.execute()
|
||||
```
|
||||
|
||||
* More detailed list of tools can be found [here](https://app.composio.dev)
|
||||
122
src/crewai_tools/tools/composio_tool/composio_tool.py
Normal file
122
src/crewai_tools/tools/composio_tool/composio_tool.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Composio tools wrapper.
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
import typing_extensions as te
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class ComposioTool(BaseTool):
|
||||
"""Wrapper for composio tools."""
|
||||
|
||||
composio_action: t.Callable
|
||||
|
||||
def _run(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
|
||||
"""Run the composio action with given arguments."""
|
||||
return self.composio_action(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _check_connected_account(tool: t.Any, toolset: t.Any) -> None:
|
||||
"""Check if connected account is required and if required it exists or not."""
|
||||
from composio import Action
|
||||
from composio.client.collections import ConnectedAccountModel
|
||||
|
||||
tool = t.cast(Action, tool)
|
||||
if tool.no_auth:
|
||||
return
|
||||
|
||||
connections = t.cast(
|
||||
t.List[ConnectedAccountModel],
|
||||
toolset.client.connected_accounts.get(),
|
||||
)
|
||||
if tool.app not in [connection.appUniqueId for connection in connections]:
|
||||
raise RuntimeError(
|
||||
f"No connected account found for app `{tool.app}`; "
|
||||
f"Run `composio add {tool.app}` to fix this"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_action(
|
||||
cls,
|
||||
action: t.Any,
|
||||
**kwargs: t.Any,
|
||||
) -> te.Self:
|
||||
"""Wrap a composio tool as crewAI tool."""
|
||||
|
||||
from composio import Action, ComposioToolSet
|
||||
from composio.constants import DEFAULT_ENTITY_ID
|
||||
from composio.utils.shared import json_schema_to_model
|
||||
|
||||
toolset = ComposioToolSet()
|
||||
if not isinstance(action, Action):
|
||||
action = Action(action)
|
||||
|
||||
action = t.cast(Action, action)
|
||||
cls._check_connected_account(
|
||||
tool=action,
|
||||
toolset=toolset,
|
||||
)
|
||||
|
||||
(action_schema,) = toolset.get_action_schemas(actions=[action])
|
||||
schema = action_schema.model_dump(exclude_none=True)
|
||||
entity_id = kwargs.pop("entity_id", DEFAULT_ENTITY_ID)
|
||||
|
||||
def function(**kwargs: t.Any) -> t.Dict:
|
||||
"""Wrapper function for composio action."""
|
||||
return toolset.execute_action(
|
||||
action=Action(schema["name"]),
|
||||
params=kwargs,
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
function.__name__ = schema["name"]
|
||||
function.__doc__ = schema["description"]
|
||||
|
||||
return cls(
|
||||
name=schema["name"],
|
||||
description=schema["description"],
|
||||
args_schema=json_schema_to_model(
|
||||
action_schema.parameters.model_dump(
|
||||
exclude_none=True,
|
||||
)
|
||||
),
|
||||
composio_action=function,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_app(
|
||||
cls,
|
||||
*apps: t.Any,
|
||||
tags: t.Optional[t.List[str]] = None,
|
||||
use_case: t.Optional[str] = None,
|
||||
**kwargs: t.Any,
|
||||
) -> t.List[te.Self]:
|
||||
"""Create toolset from an app."""
|
||||
if len(apps) == 0:
|
||||
raise ValueError("You need to provide at least one app name")
|
||||
|
||||
if use_case is None and tags is None:
|
||||
raise ValueError("Both `use_case` and `tags` cannot be `None`")
|
||||
|
||||
if use_case is not None and tags is not None:
|
||||
raise ValueError(
|
||||
"Cannot use both `use_case` and `tags` to filter the actions"
|
||||
)
|
||||
|
||||
from composio import ComposioToolSet
|
||||
|
||||
toolset = ComposioToolSet()
|
||||
if use_case is not None:
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_use_case(*apps, use_case=use_case)
|
||||
]
|
||||
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags)
|
||||
]
|
||||
@@ -57,4 +57,4 @@ class CSVSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
41
src/crewai_tools/tools/dalle_tool/README.MD
Normal file
41
src/crewai_tools/tools/dalle_tool/README.MD
Normal file
@@ -0,0 +1,41 @@
|
||||
# DALL-E Tool
|
||||
|
||||
## Description
|
||||
This tool is used to give the Agent the ability to generate images using the DALL-E model. It is a transformer-based model that generates images from textual descriptions. This tool allows the Agent to generate images based on the text input provided by the user.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Remember that when using this tool, the text must be generated by the Agent itself. The text must be a description of the image you want to generate.
|
||||
|
||||
```python
|
||||
from crewai_tools import DallETool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[DallETool()],
|
||||
)
|
||||
```
|
||||
|
||||
If needed you can also tweak the parameters of the DALL-E model by passing them as arguments to the `DallETool` class. For example:
|
||||
|
||||
```python
|
||||
from crewai_tools import DallETool
|
||||
|
||||
dalle_tool = DallETool(model: str = "dall-e-3",
|
||||
size: str = "1024x1024",
|
||||
quality: str = "standard",
|
||||
n: int = 1)
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[dalle_tool]
|
||||
)
|
||||
```
|
||||
|
||||
The parameter are based on the `client.images.generate` method from the OpenAI API. For more information on the parameters, please refer to the [OpenAI API documentation](https://platform.openai.com/docs/guides/images/introduction?lang=python).
|
||||
48
src/crewai_tools/tools/dalle_tool/dalle_tool.py
Normal file
48
src/crewai_tools/tools/dalle_tool/dalle_tool.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
from typing import Type
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from openai import OpenAI
|
||||
from pydantic.v1 import BaseModel
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
"""Input for Dall-E Tool."""
|
||||
|
||||
image_description: str = "Description of the image to be generated by Dall-E."
|
||||
|
||||
|
||||
class DallETool(BaseTool):
|
||||
name: str = "Dall-E Tool"
|
||||
description: str = "Generates images using OpenAI's Dall-E model."
|
||||
args_schema: Type[BaseModel] = ImagePromptSchema
|
||||
|
||||
model: str = "dall-e-3"
|
||||
size: str = "1024x1024"
|
||||
quality: str = "standard"
|
||||
n: int = 1
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
client = OpenAI()
|
||||
|
||||
image_description = kwargs.get("image_description")
|
||||
|
||||
if not image_description:
|
||||
return "Image description is required."
|
||||
|
||||
response = client.images.generate(
|
||||
model=self.model,
|
||||
prompt=image_description,
|
||||
size=self.size,
|
||||
quality=self.quality,
|
||||
n=self.n,
|
||||
)
|
||||
|
||||
image_data = json.dumps(
|
||||
{
|
||||
"image_url": response.data[0].url,
|
||||
"image_description": response.data[0].revised_prompt,
|
||||
}
|
||||
)
|
||||
|
||||
return image_data
|
||||
@@ -57,4 +57,4 @@ class DirectorySearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -63,4 +63,4 @@ class DOCXSearchTool(RagTool):
|
||||
docx = kwargs.get("docx")
|
||||
if docx is not None:
|
||||
self.add(docx)
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# FirecrawlCrawlWebsiteTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlScrapeFromWebsiteTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlCrawlWebsiteTool
|
||||
|
||||
tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `url`: The base URL to start crawling from.
|
||||
- `page_options`: Optional.
|
||||
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
|
||||
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
|
||||
- `crawler_options`: Optional. Options for controlling the crawling behavior.
|
||||
- `includes`: Optional. URL patterns to include in the crawl.
|
||||
- `exclude`: Optional. URL patterns to exclude from the crawl.
|
||||
- `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan).
|
||||
- `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents.
|
||||
- `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.
|
||||
- `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites.
|
||||
- `limit`: Optional. Maximum number of pages to crawl.
|
||||
- `timeout`: Optional. Timeout in milliseconds for the crawling operation.
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
from typing import Optional, Any, Type, Dict, List
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
description: str = "Crawl webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
|
||||
options = {
|
||||
"crawlerOptions": crawler_options,
|
||||
"pageOptions": page_options
|
||||
}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
@@ -0,0 +1,38 @@
|
||||
# FirecrawlScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlScrapeWebsiteTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlScrapeWebsiteTool
|
||||
|
||||
tool = FirecrawlScrapeWebsiteTool(url='firecrawl.dev')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `url`: The URL to scrape.
|
||||
- `page_options`: Optional.
|
||||
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
|
||||
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
|
||||
- `extractor_options`: Optional. Options for LLM-based extraction of structured information from the page content
|
||||
- `mode`: The extraction mode to use, currently supports 'llm-extraction'
|
||||
- `extractionPrompt`: Optional. A prompt describing what information to extract from the page
|
||||
- `extractionSchema`: Optional. The schema for the data to be extracted
|
||||
- `timeout`: Optional. Timeout in milliseconds for the request
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
from typing import Optional, Any, Type, Dict
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
|
||||
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
|
||||
timeout: Optional[int] = Field(default=None, description="Timeout for the scraping operation")
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web scrape tool"
|
||||
description: str = "Scrape webpages url using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"extractorOptions": extractor_options,
|
||||
"timeout": timeout
|
||||
}
|
||||
return self.firecrawl.scrape_url(url, options)
|
||||
35
src/crewai_tools/tools/firecrawl_search_tool/README.md
Normal file
35
src/crewai_tools/tools/firecrawl_search_tool/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# FirecrawlSearchTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlSearchTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlSearchTool
|
||||
|
||||
tool = FirecrawlSearchTool(query='what is firecrawl?')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `query`: The search query string to be used for searching.
|
||||
- `page_options`: Optional. Options for result formatting.
|
||||
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
|
||||
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
|
||||
- `fetchPageContent`: Optional. Fetch the full content of the page.
|
||||
- `search_options`: Optional. Options for controlling the crawling behavior.
|
||||
- `limit`: Optional. Maximum number of pages to crawl.
|
||||
@@ -0,0 +1,33 @@
|
||||
from typing import Optional, Any, Type, Dict, List
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class FirecrawlSearchToolSchema(BaseModel):
|
||||
query: str = Field(description="Search query")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
|
||||
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
|
||||
|
||||
class FirecrawlSearchTool(BaseTool):
|
||||
name: str = "Firecrawl web search tool"
|
||||
description: str = "Search webpages using Firecrawl and return the results"
|
||||
args_schema: Type[BaseModel] = FirecrawlSearchToolSchema
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"resultOptions": result_options
|
||||
}
|
||||
return self.firecrawl.search(query, options)
|
||||
@@ -68,4 +68,4 @@ class GithubSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class JSONSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
53
src/crewai_tools/tools/llamaindex_tool/README.md
Normal file
53
src/crewai_tools/tools/llamaindex_tool/README.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# LlamaIndexTool Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to be a general wrapper around LlamaIndex tools and query engines, enabling you to leverage LlamaIndex resources
|
||||
in terms of RAG/agentic pipelines as tools to plug into CrewAI agents.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import LlamaIndexTool
|
||||
|
||||
# Initialize the tool from a LlamaIndex Tool
|
||||
|
||||
## Example 1: Initialize from FunctionTool
|
||||
from llama_index.core.tools import FunctionTool
|
||||
|
||||
your_python_function = lambda ...: ...
|
||||
og_tool = FunctionTool.from_defaults(your_python_function, name="<name>", description='<description>')
|
||||
tool = LlamaIndexTool.from_tool(og_tool)
|
||||
|
||||
## Example 2: Initialize from LlamaHub Tools
|
||||
from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
|
||||
wolfram_spec = WolframAlphaToolSpec(app_id="<app_id>")
|
||||
wolfram_tools = wolfram_spec.to_tool_list()
|
||||
tools = [LlamaIndexTool.from_tool(t) for t in wolfram_tools]
|
||||
|
||||
|
||||
# Initialize Tool from a LlamaIndex Query Engine
|
||||
|
||||
## NOTE: LlamaIndex has a lot of query engines, define whatever query engine you want
|
||||
query_engine = index.as_query_engine()
|
||||
query_tool = LlamaIndexTool.from_query_engine(
|
||||
query_engine,
|
||||
name="Uber 2019 10K Query Tool",
|
||||
description="Use this tool to lookup the 2019 Uber 10K Annual Report"
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `LlamaIndexTool`, follow these steps:
|
||||
|
||||
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **Install and use LlamaIndex**: Follow LlamaIndex documentation (https://docs.llamaindex.ai/) to setup a RAG/agent pipeline.
|
||||
|
||||
|
||||
84
src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py
Normal file
84
src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
|
||||
from typing import Type, Any, cast, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class LlamaIndexTool(BaseTool):
|
||||
"""Tool to wrap LlamaIndex tools/query engines."""
|
||||
llama_index_tool: Any
|
||||
|
||||
def _run(
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run tool."""
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
||||
return tool(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_tool(
|
||||
cls,
|
||||
tool: Any,
|
||||
**kwargs: Any
|
||||
) -> "LlamaIndexTool":
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
|
||||
if not isinstance(tool, LlamaBaseTool):
|
||||
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
||||
tool = cast(LlamaBaseTool, tool)
|
||||
|
||||
if tool.metadata.fn_schema is None:
|
||||
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
|
||||
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
|
||||
|
||||
return cls(
|
||||
name=tool.metadata.name,
|
||||
description=tool.metadata.description,
|
||||
args_schema=args_schema,
|
||||
llama_index_tool=tool,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_query_engine(
|
||||
cls,
|
||||
query_engine: Any,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
return_direct: bool = False,
|
||||
**kwargs: Any
|
||||
) -> "LlamaIndexTool":
|
||||
from llama_index.core.query_engine import BaseQueryEngine
|
||||
from llama_index.core.tools import QueryEngineTool
|
||||
|
||||
if not isinstance(query_engine, BaseQueryEngine):
|
||||
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
||||
|
||||
# NOTE: by default the schema expects an `input` variable. However this
|
||||
# confuses crewAI so we are renaming to `query`.
|
||||
class QueryToolSchema(BaseModel):
|
||||
"""Schema for query tool."""
|
||||
query: str = Field(..., description="Search query for the query tool.")
|
||||
|
||||
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
|
||||
query_engine_tool = QueryEngineTool.from_defaults(
|
||||
query_engine,
|
||||
name=name,
|
||||
description=description,
|
||||
return_direct=return_direct,
|
||||
resolve_input_errors=True,
|
||||
)
|
||||
# HACK: we are replacing the schema with our custom schema
|
||||
query_engine_tool.metadata.fn_schema = QueryToolSchema
|
||||
|
||||
return cls.from_tool(
|
||||
query_engine_tool,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@@ -57,4 +57,4 @@ class MDXSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
54
src/crewai_tools/tools/multion_tool/README.md
Normal file
54
src/crewai_tools/tools/multion_tool/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# MultiOnTool Documentation
|
||||
|
||||
## Description
|
||||
The MultiOnTool, integrated within the crewai_tools package, empowers CrewAI agents with the capability to navigate and interact with the web through natural language instructions. Leveraging the Multion API, this tool facilitates seamless web browsing, making it an essential asset for projects requiring dynamic web data interaction.
|
||||
|
||||
## Installation
|
||||
Ensure the `crewai[tools]` package is installed in your environment to use the MultiOnTool. If it's not already installed, you can add it using the command below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools import MultiOnTool
|
||||
|
||||
# Initialize the tool from a MultiOn Tool
|
||||
multion_tool = MultiOnTool(api_key= "YOUR_MULTION_API_KEY", local=False)
|
||||
|
||||
Browser = Agent(
|
||||
role="Browser Agent",
|
||||
goal="control web browsers using natural language ",
|
||||
backstory="An expert browsing agent.",
|
||||
tools=[multion_remote_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# example task to search and summarize news
|
||||
browse = Task(
|
||||
description="Summarize the top 3 trending AI News headlines",
|
||||
expected_output="A summary of the top 3 trending AI News headlines",
|
||||
agent=Browser,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[Browser], tasks=[browse])
|
||||
|
||||
crew.kickoff()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
|
||||
- `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked.
|
||||
- `max_steps`: Optional. Set the max_steps the multion agent can take for a command
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `MultiOnTool`, follow these steps:
|
||||
|
||||
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension).
|
||||
3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API
|
||||
|
||||
29
src/crewai_tools/tools/multion_tool/example.py
Normal file
29
src/crewai_tools/tools/multion_tool/example.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
|
||||
from crewai import Agent, Crew, Task
|
||||
from multion_tool import MultiOnTool
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "Your Key"
|
||||
|
||||
multion_browse_tool = MultiOnTool(api_key="Your Key")
|
||||
|
||||
# Create a new agent
|
||||
Browser = Agent(
|
||||
role="Browser Agent",
|
||||
goal="control web browsers using natural language ",
|
||||
backstory="An expert browsing agent.",
|
||||
tools=[multion_browse_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Define tasks
|
||||
browse = Task(
|
||||
description="Summarize the top 3 trending AI News headlines",
|
||||
expected_output="A summary of the top 3 trending AI News headlines",
|
||||
agent=Browser,
|
||||
)
|
||||
|
||||
|
||||
crew = Crew(agents=[Browser], tasks=[browse])
|
||||
|
||||
crew.kickoff()
|
||||
65
src/crewai_tools/tools/multion_tool/multion_tool.py
Normal file
65
src/crewai_tools/tools/multion_tool/multion_tool.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Multion tool spec."""
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class MultiOnTool(BaseTool):
|
||||
"""Tool to wrap MultiOn Browse Capabilities."""
|
||||
|
||||
name: str = "Multion Browse Tool"
|
||||
description: str = """Multion gives the ability for LLMs to control web browsers using natural language instructions.
|
||||
If the status is 'CONTINUE', reissue the same instruction to continue execution
|
||||
"""
|
||||
multion: Optional[Any] = None
|
||||
session_id: Optional[str] = None
|
||||
local: bool = False
|
||||
max_steps: int = 3
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
local: bool = False,
|
||||
max_steps: int = 3,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from multion.client import MultiOn # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`multion` package not found, please run `pip install multion`"
|
||||
)
|
||||
self.session_id = None
|
||||
self.local = local
|
||||
self.multion = MultiOn(api_key=api_key)
|
||||
self.max_steps = max_steps
|
||||
|
||||
def _run(
|
||||
self,
|
||||
cmd: str,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""
|
||||
Run the Multion client with the given command.
|
||||
|
||||
Args:
|
||||
cmd (str): The detailed and specific natural language instructrion for web browsing
|
||||
|
||||
*args (Any): Additional arguments to pass to the Multion client
|
||||
**kwargs (Any): Additional keyword arguments to pass to the Multion client
|
||||
"""
|
||||
|
||||
browse = self.multion.browse(
|
||||
cmd=cmd,
|
||||
session_id=self.session_id,
|
||||
local=self.local,
|
||||
max_steps=self.max_steps,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
self.session_id = browse.session_id
|
||||
|
||||
return browse.message + "\n\n STATUS: " + browse.status
|
||||
74
src/crewai_tools/tools/nl2sql/README.md
Normal file
74
src/crewai_tools/tools/nl2sql/README.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# NL2SQL Tool
|
||||
|
||||
## Description
|
||||
|
||||
This tool is used to convert natural language to SQL queries. When passsed to the agent it will generate queries and then use them to interact with the database.
|
||||
|
||||
This enables multiple workflows like having an Agent to access the database fetch information based on the goal and then use the information to generate a response, report or any other output. Along with that proivdes the ability for the Agent to update the database based on its goal.
|
||||
|
||||
**Attention**: Make sure that the Agent has access to a Read-Replica or that is okay for the Agent to run insert/update queries on the database.
|
||||
|
||||
## Requirements
|
||||
|
||||
- SqlAlchemy
|
||||
- Any DB compatible library (e.g. psycopg2, mysql-connector-python)
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
In order to use the NL2SQLTool, you need to pass the database URI to the tool. The URI should be in the format `dialect+driver://username:password@host:port/database`.
|
||||
|
||||
|
||||
```python
|
||||
from crewai_tools import NL2SQLTool
|
||||
|
||||
# psycopg2 was installed to run this example with PostgreSQL
|
||||
nl2sql = NL2SQLTool(db_uri="postgresql://example@localhost:5432/test_db")
|
||||
|
||||
@agent
|
||||
def researcher(self) -> Agent:
|
||||
return Agent(
|
||||
config=self.agents_config["researcher"],
|
||||
allow_delegation=False,
|
||||
tools=[nl2sql]
|
||||
)
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
The primary task goal was:
|
||||
|
||||
"Retrieve the average, maximum, and minimum monthly revenue for each city, but only include cities that have more than one user. Also, count the number of user in each city and sort the results by the average monthly revenue in descending order"
|
||||
|
||||
So the Agent tried to get information from the DB, the first one is wrong so the Agent tries again and gets the correct information and passes to the next agent.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
The second task goal was:
|
||||
|
||||
"Review the data and create a detailed report, and then create the table on the database with the fields based on the data provided.
|
||||
Include information on the average, maximum, and minimum monthly revenue for each city, but only include cities that have more than one user. Also, count the number of users in each city and sort the results by the average monthly revenue in descending order."
|
||||
|
||||
Now things start to get interesting, the Agent generates the SQL query to not only create the table but also insert the data into the table. And in the end the Agent still returns the final report which is exactly what was in the database.
|
||||
|
||||

|
||||

|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
This is a simple example of how the NL2SQLTool can be used to interact with the database and generate reports based on the data in the database.
|
||||
|
||||
The Tool provides endless possibilities on the logic of the Agent and how it can interact with the database.
|
||||
|
||||
```
|
||||
DB -> Agent -> ... -> Agent -> DB
|
||||
```
|
||||
BIN
src/crewai_tools/tools/nl2sql/images/image-2.png
Normal file
BIN
src/crewai_tools/tools/nl2sql/images/image-2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 83 KiB |
BIN
src/crewai_tools/tools/nl2sql/images/image-3.png
Normal file
BIN
src/crewai_tools/tools/nl2sql/images/image-3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 82 KiB |
BIN
src/crewai_tools/tools/nl2sql/images/image-4.png
Normal file
BIN
src/crewai_tools/tools/nl2sql/images/image-4.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 82 KiB |
BIN
src/crewai_tools/tools/nl2sql/images/image-5.png
Normal file
BIN
src/crewai_tools/tools/nl2sql/images/image-5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 65 KiB |
BIN
src/crewai_tools/tools/nl2sql/images/image-7.png
Normal file
BIN
src/crewai_tools/tools/nl2sql/images/image-7.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
BIN
src/crewai_tools/tools/nl2sql/images/image-9.png
Normal file
BIN
src/crewai_tools/tools/nl2sql/images/image-9.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
72
src/crewai_tools/tools/nl2sql/nl2sql_tool.py
Normal file
72
src/crewai_tools/tools/nl2sql/nl2sql_tool.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from typing import Any, Union
|
||||
|
||||
from crewai_tools import BaseTool
|
||||
from pydantic import Field
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
|
||||
class NL2SQLTool(BaseTool):
|
||||
name: str = "NL2SQLTool"
|
||||
description: str = "Converts natural language to SQL queries and executes them."
|
||||
db_uri: str = Field(
|
||||
title="Database URI",
|
||||
description="The URI of the database to connect to.",
|
||||
)
|
||||
tables: list = []
|
||||
columns: dict = {}
|
||||
|
||||
def model_post_init(self, __context: Any) -> None:
|
||||
data = {}
|
||||
tables = self._fetch_available_tables()
|
||||
|
||||
for table in tables:
|
||||
table_columns = self._fetch_all_available_columns(table["table_name"])
|
||||
data[f'{table["table_name"]}_columns'] = table_columns
|
||||
|
||||
self.tables = tables
|
||||
self.columns = data
|
||||
|
||||
def _fetch_available_tables(self):
|
||||
return self.execute_sql(
|
||||
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';"
|
||||
)
|
||||
|
||||
def _fetch_all_available_columns(self, table_name: str):
|
||||
return self.execute_sql(
|
||||
f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}';"
|
||||
)
|
||||
|
||||
def _run(self, sql_query: str):
|
||||
try:
|
||||
data = self.execute_sql(sql_query)
|
||||
except Exception as exc:
|
||||
data = (
|
||||
f"Based on these tables {self.tables} and columns {self.columns}, "
|
||||
"you can create SQL queries to retrieve data from the database."
|
||||
f"Get the original request {sql_query} and the error {exc} and create the correct SQL query."
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
def execute_sql(self, sql_query: str) -> Union[list, str]:
|
||||
engine = create_engine(self.db_uri)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
try:
|
||||
result = session.execute(text(sql_query))
|
||||
session.commit()
|
||||
|
||||
if result.returns_rows:
|
||||
columns = result.keys()
|
||||
data = [dict(zip(columns, row)) for row in result.fetchall()]
|
||||
return data
|
||||
else:
|
||||
return f"Query {sql_query} executed successfully"
|
||||
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
raise e
|
||||
|
||||
finally:
|
||||
session.close()
|
||||
@@ -1,6 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic import model_validator
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
@@ -35,6 +36,22 @@ class PDFSearchTool(RagTool):
|
||||
self.args_schema = FixedPDFSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _set_default_adapter(self):
|
||||
if isinstance(self.adapter, RagTool._AdapterPlaceholder):
|
||||
from embedchain import App
|
||||
|
||||
from crewai_tools.adapters.pdf_embedchain_adapter import (
|
||||
PDFEmbedchainAdapter,
|
||||
)
|
||||
|
||||
app = App.from_config(config=self.config) if self.config else App()
|
||||
self.adapter = PDFEmbedchainAdapter(
|
||||
embedchain_app=app, summarize=self.summarize
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def add(
|
||||
self,
|
||||
*args: Any,
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
from typing import Any, Optional, Type
|
||||
from pydantic import BaseModel, Field
|
||||
from pypdf import PdfReader, PdfWriter, PageObject, ContentStream, NameObject, Font
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class PDFTextWritingToolSchema(BaseModel):
|
||||
"""Input schema for PDFTextWritingTool."""
|
||||
pdf_path: str = Field(..., description="Path to the PDF file to modify")
|
||||
text: str = Field(..., description="Text to add to the PDF")
|
||||
position: tuple = Field(..., description="Tuple of (x, y) coordinates for text placement")
|
||||
font_size: int = Field(default=12, description="Font size of the text")
|
||||
font_color: str = Field(default="0 0 0 rg", description="RGB color code for the text")
|
||||
font_name: Optional[str] = Field(default="F1", description="Font name for standard fonts")
|
||||
font_file: Optional[str] = Field(None, description="Path to a .ttf font file for custom font usage")
|
||||
page_number: int = Field(default=0, description="Page number to add text to")
|
||||
|
||||
|
||||
class PDFTextWritingTool(RagTool):
|
||||
"""A tool to add text to specific positions in a PDF, with custom font support."""
|
||||
name: str = "PDF Text Writing Tool"
|
||||
description: str = "A tool that can write text to a specific position in a PDF document, with optional custom font embedding."
|
||||
args_schema: Type[BaseModel] = PDFTextWritingToolSchema
|
||||
|
||||
def run(self, pdf_path: str, text: str, position: tuple, font_size: int, font_color: str,
|
||||
font_name: str = "F1", font_file: Optional[str] = None, page_number: int = 0, **kwargs) -> str:
|
||||
reader = PdfReader(pdf_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
if page_number >= len(reader.pages):
|
||||
return "Page number out of range."
|
||||
|
||||
page: PageObject = reader.pages[page_number]
|
||||
content = ContentStream(page["/Contents"].data, reader)
|
||||
|
||||
if font_file:
|
||||
# Check if the font file exists
|
||||
if not Path(font_file).exists():
|
||||
return "Font file does not exist."
|
||||
|
||||
# Embed the custom font
|
||||
font_name = self.embed_font(writer, font_file)
|
||||
|
||||
# Prepare text operation with the custom or standard font
|
||||
x_position, y_position = position
|
||||
text_operation = f"BT /{font_name} {font_size} Tf {x_position} {y_position} Td ({text}) Tj ET"
|
||||
content.operations.append([font_color]) # Set color
|
||||
content.operations.append([text_operation]) # Add text
|
||||
|
||||
# Replace old content with new content
|
||||
page[NameObject("/Contents")] = content
|
||||
writer.add_page(page)
|
||||
|
||||
# Save the new PDF
|
||||
output_pdf_path = "modified_output.pdf"
|
||||
with open(output_pdf_path, "wb") as out_file:
|
||||
writer.write(out_file)
|
||||
|
||||
return f"Text added to {output_pdf_path} successfully."
|
||||
|
||||
def embed_font(self, writer: PdfWriter, font_file: str) -> str:
|
||||
"""Embeds a TTF font into the PDF and returns the font name."""
|
||||
with open(font_file, "rb") as file:
|
||||
font = Font.true_type(file.read())
|
||||
font_ref = writer.add_object(font)
|
||||
return font_ref
|
||||
@@ -41,4 +41,4 @@ class PGSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -25,8 +25,7 @@ class ScrapeWebsiteTool(BaseTool):
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Accept-Encoding': 'gzip, deflate, br'
|
||||
'Upgrade-Insecure-Requests': '1'
|
||||
}
|
||||
|
||||
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):
|
||||
@@ -40,8 +39,8 @@ class ScrapeWebsiteTool(BaseTool):
|
||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get('website_url', self.website_url)
|
||||
page = requests.get(
|
||||
@@ -50,9 +49,11 @@ class ScrapeWebsiteTool(BaseTool):
|
||||
headers=self.headers,
|
||||
cookies=self.cookies if self.cookies else {}
|
||||
)
|
||||
parsed = BeautifulSoup(page.content, "html.parser")
|
||||
|
||||
page.encoding = page.apparent_encoding
|
||||
parsed = BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
text = parsed.get_text()
|
||||
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
||||
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
|
||||
return text
|
||||
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
# ScrapflyScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
[ScrapFly](https://scrapfly.io/) is a web scraping API with headless browser capabilities, proxies, and anti-bot bypass. It allows for extracting web page data into accessible LLM markdown or text.
|
||||
|
||||
## Setup and Installation
|
||||
1. **Install ScrapFly Python SDK**: Install `scrapfly-sdk` Python package is installed to use the ScrapFly Web Loader. Install it via pip with the following command:
|
||||
|
||||
```bash
|
||||
pip install scrapfly-sdk
|
||||
```
|
||||
|
||||
2. **API Key**: Register for free from [scrapfly.io/register](https://www.scrapfly.io/register/) to obtain your API key.
|
||||
|
||||
## Example Usage
|
||||
|
||||
Utilize the ScrapflyScrapeWebsiteTool as follows to retrieve a web page data as text, markdown (LLM accissible) or HTML:
|
||||
|
||||
```python
|
||||
from crewai_tools import ScrapflyScrapeWebsiteTool
|
||||
|
||||
tool = ScrapflyScrapeWebsiteTool(
|
||||
api_key="Your ScrapFly API key"
|
||||
)
|
||||
|
||||
result = tool._run(
|
||||
url="https://web-scraping.dev/products",
|
||||
scrape_format="markdown",
|
||||
ignore_scrape_failures=True
|
||||
)
|
||||
```
|
||||
|
||||
## Additional Arguments
|
||||
The ScrapflyScrapeWebsiteTool also allows passigng ScrapeConfig object for customizing the scrape request. See the [API params documentation](https://scrapfly.io/docs/scrape-api/getting-started) for the full feature details and their API params:
|
||||
```python
|
||||
from crewai_tools import ScrapflyScrapeWebsiteTool
|
||||
|
||||
tool = ScrapflyScrapeWebsiteTool(
|
||||
api_key="Your ScrapFly API key"
|
||||
)
|
||||
|
||||
scrapfly_scrape_config = {
|
||||
"asp": True, # Bypass scraping blocking and solutions, like Cloudflare
|
||||
"render_js": True, # Enable JavaScript rendering with a cloud headless browser
|
||||
"proxy_pool": "public_residential_pool", # Select a proxy pool (datacenter or residnetial)
|
||||
"country": "us", # Select a proxy location
|
||||
"auto_scroll": True, # Auto scroll the page
|
||||
"js": "" # Execute custom JavaScript code by the headless browser
|
||||
}
|
||||
|
||||
result = tool._run(
|
||||
url="https://web-scraping.dev/products",
|
||||
scrape_format="markdown",
|
||||
ignore_scrape_failures=True,
|
||||
scrape_config=scrapfly_scrape_config
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,47 @@
|
||||
import logging
|
||||
|
||||
from typing import Optional, Any, Type, Dict, Literal
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Webpage URL")
|
||||
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format")
|
||||
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config")
|
||||
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures")
|
||||
|
||||
class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Scrapfly web scraping API tool"
|
||||
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text"
|
||||
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
|
||||
api_key: str = None
|
||||
scrapfly: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
super().__init__()
|
||||
try:
|
||||
from scrapfly import ScrapflyClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`scrapfly` package not found, please run `pip install scrapfly-sdk`"
|
||||
)
|
||||
self.scrapfly = ScrapflyClient(key=api_key)
|
||||
|
||||
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None):
|
||||
from scrapfly import ScrapeApiResponse, ScrapeConfig
|
||||
|
||||
scrape_config = scrape_config if scrape_config is not None else {}
|
||||
try:
|
||||
response: ScrapeApiResponse = self.scrapfly.scrape(
|
||||
ScrapeConfig(url, format=scrape_format, **scrape_config)
|
||||
)
|
||||
return response.scrape_result["content"]
|
||||
except Exception as e:
|
||||
if ignore_scrape_failures:
|
||||
logger.error(f"Error fetching data from {url}, exception: {e}")
|
||||
return None
|
||||
else:
|
||||
raise e
|
||||
|
||||
@@ -5,7 +5,7 @@ from pydantic.v1 import BaseModel, Field
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
import datetime
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
|
||||
from typing import Type, Any
|
||||
from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
def _save_results_to_file(content: str) -> None:
|
||||
"""Saves the search results to a file."""
|
||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
with open(filename, 'w') as file:
|
||||
file.write(content)
|
||||
print(f"Results saved to {filename}")
|
||||
|
||||
|
||||
class SerperDevToolSchema(BaseModel):
|
||||
"""Input for SerperDevTool."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
|
||||
@@ -15,25 +24,42 @@ class SerperDevTool(BaseTool):
|
||||
description: str = "A tool that can be used to search the internet with a search_query."
|
||||
args_schema: Type[BaseModel] = SerperDevToolSchema
|
||||
search_url: str = "https://google.serper.dev/search"
|
||||
country: Optional[str] = ''
|
||||
location: Optional[str] = ''
|
||||
locale: Optional[str] = ''
|
||||
n_results: int = 10
|
||||
save_file: bool = False
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
search_query = kwargs.get('search_query')
|
||||
if search_query is None:
|
||||
search_query = kwargs.get('query')
|
||||
|
||||
payload = json.dumps({"q": search_query})
|
||||
search_query = kwargs.get('search_query') or kwargs.get('query')
|
||||
save_file = kwargs.get('save_file', self.save_file)
|
||||
n_results = kwargs.get('n_results', self.n_results)
|
||||
|
||||
payload = { "q": search_query, "num": n_results }
|
||||
|
||||
if self.country != '':
|
||||
payload["gl"] = self.country
|
||||
if self.location != '':
|
||||
payload["location"] = self.location
|
||||
if self.locale != '':
|
||||
payload["hl"] = self.locale
|
||||
|
||||
payload = json.dumps(payload)
|
||||
|
||||
headers = {
|
||||
'X-API-KEY': os.environ['SERPER_API_KEY'],
|
||||
'content-type': 'application/json'
|
||||
'X-API-KEY': os.environ['SERPER_API_KEY'],
|
||||
'content-type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", self.search_url, headers=headers, data=payload)
|
||||
results = response.json()
|
||||
|
||||
if 'organic' in results:
|
||||
results = results['organic']
|
||||
results = results['organic'][:self.n_results]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
@@ -44,9 +70,11 @@ class SerperDevTool(BaseTool):
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
next
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
if save_file:
|
||||
_save_results_to_file(content)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
|
||||
117
src/crewai_tools/tools/serply_api_tool/README.md
Normal file
117
src/crewai_tools/tools/serply_api_tool/README.md
Normal file
@@ -0,0 +1,117 @@
|
||||
# Serply API Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user.
|
||||
|
||||
## Installation
|
||||
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
## Web Search
|
||||
The following example demonstrates how to initialize the tool and execute a search the web with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyWebSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyWebSearchTool()
|
||||
|
||||
# increase search limits to 100 results
|
||||
tool = SerplyWebSearchTool(limit=100)
|
||||
|
||||
|
||||
# change results language (fr - French)
|
||||
tool = SerplyWebSearchTool(hl="fr")
|
||||
```
|
||||
|
||||
## News Search
|
||||
The following example demonstrates how to initialize the tool and execute a search news with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyNewsSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyNewsSearchTool()
|
||||
|
||||
# change country news (JP - Japan)
|
||||
tool = SerplyNewsSearchTool(proxy_location="JP")
|
||||
```
|
||||
|
||||
## Scholar Search
|
||||
The following example demonstrates how to initialize the tool and execute a search scholar articles a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyScholarSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyScholarSearchTool()
|
||||
|
||||
# change country news (GB - Great Britain)
|
||||
tool = SerplyScholarSearchTool(proxy_location="GB")
|
||||
```
|
||||
|
||||
## Job Search
|
||||
The following example demonstrates how to initialize the tool and searching for jobs in the USA:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyJobSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyJobSearchTool()
|
||||
```
|
||||
|
||||
|
||||
## Web Page To Markdown
|
||||
The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyWebpageToMarkdownTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyWebpageToMarkdownTool()
|
||||
|
||||
# change country make request from (DE - Germany)
|
||||
tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
|
||||
```
|
||||
|
||||
## Combining Multiple Tools
|
||||
|
||||
The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
|
||||
|
||||
search_tool = SerplyWebSearchTool()
|
||||
convert_to_markdown = SerplyWebpageToMarkdownTool()
|
||||
|
||||
# Creating a senior researcher agent with memory and verbose mode
|
||||
researcher = Agent(
|
||||
role='Senior Researcher',
|
||||
goal='Uncover groundbreaking technologies in {topic}',
|
||||
verbose=True,
|
||||
memory=True,
|
||||
backstory=(
|
||||
"Driven by curiosity, you're at the forefront of"
|
||||
"innovation, eager to explore and share knowledge that could change"
|
||||
"the world."
|
||||
),
|
||||
tools=[search_tool, convert_to_markdown],
|
||||
allow_delegation=True
|
||||
)
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `SerplyApiTool`, follow these steps:
|
||||
|
||||
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io).
|
||||
3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool.
|
||||
|
||||
## Conclusion
|
||||
By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
|
||||
@@ -0,0 +1,75 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class SerplyJobSearchToolSchema(BaseModel):
|
||||
"""Input for Job Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
|
||||
|
||||
|
||||
class SerplyJobSearchTool(RagTool):
|
||||
name: str = "Job Search"
|
||||
description: str = "A tool to perform to perform a job search in the US with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
|
||||
request_url: str = "https://api.serply.io/v1/job/search/"
|
||||
proxy_location: Optional[str] = "US"
|
||||
"""
|
||||
proxy_location: (str): Where to get jobs, specifically for a specific country results.
|
||||
- Currently only supports US
|
||||
"""
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": self.proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
query_payload = {}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.request_url}{urlencode(query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
|
||||
jobs = response.json().get("jobs", "")
|
||||
|
||||
if not jobs:
|
||||
return ""
|
||||
|
||||
string = []
|
||||
for job in jobs:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Position: {job['position']}",
|
||||
f"Employer: {job['employer']}",
|
||||
f"Location: {job['location']}",
|
||||
f"Link: {job['link']}",
|
||||
f"""Highest: {', '.join([h for h in job['highlights']])}""",
|
||||
f"Is Remote: {job['is_remote']}",
|
||||
f"Is Hybrid: {job['is_remote']}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
@@ -0,0 +1,81 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyNewsSearchToolSchema(BaseModel):
|
||||
"""Input for Serply News Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
|
||||
|
||||
|
||||
class SerplyNewsSearchTool(BaseTool):
|
||||
name: str = "News Search"
|
||||
description: str = "A tool to perform News article search with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/news/"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
limit: Optional[int] = 10
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
limit: Optional[int] = 10,
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.limit = limit
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
# build query parameters
|
||||
query_payload = {}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.search_url}{urlencode(query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
results = response.json()
|
||||
if "entries" in results:
|
||||
results = results['entries']
|
||||
string = []
|
||||
for result in results[:self.limit]:
|
||||
try:
|
||||
# follow url
|
||||
r = requests.get(result['link'])
|
||||
final_link = r.history[-1].headers['Location']
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {final_link}",
|
||||
f"Source: {result['source']['title']}",
|
||||
f"Published: {result['published']}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
@@ -0,0 +1,86 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyScholarSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Scholar Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
|
||||
|
||||
|
||||
class SerplyScholarSearchTool(BaseTool):
|
||||
name: str = "Scholar Search"
|
||||
description: str = "A tool to perform scholarly literature search with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/scholar/"
|
||||
hl: Optional[str] = "us"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hl: str = "us",
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.hl = hl
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
query_payload = {
|
||||
"hl": self.hl
|
||||
}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.search_url}{urlencode(query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
articles = response.json().get("articles", "")
|
||||
|
||||
if not articles:
|
||||
return ""
|
||||
|
||||
string = []
|
||||
for article in articles:
|
||||
try:
|
||||
if "doc" in article:
|
||||
link = article['doc']['link']
|
||||
else:
|
||||
link = article['link']
|
||||
authors = [author['name'] for author in article['author']['authors']]
|
||||
string.append('\n'.join([
|
||||
f"Title: {article['title']}",
|
||||
f"Link: {link}",
|
||||
f"Description: {article['description']}",
|
||||
f"Cite: {article['cite']}",
|
||||
f"Authors: {', '.join(authors)}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
@@ -0,0 +1,93 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyWebSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Web Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
|
||||
|
||||
|
||||
class SerplyWebSearchTool(BaseTool):
|
||||
name: str = "Google Search"
|
||||
description: str = "A tool to perform Google search with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyWebSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/search/"
|
||||
hl: Optional[str] = "us"
|
||||
limit: Optional[int] = 10
|
||||
device_type: Optional[str] = "desktop"
|
||||
proxy_location: Optional[str] = "US"
|
||||
query_payload: Optional[dict] = {}
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hl: str = "us",
|
||||
limit: int = 10,
|
||||
device_type: str = "desktop",
|
||||
proxy_location: str = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
param: query (str): The query to search for
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
param: device_type (str): desktop/mobile results (defaults to desktop)
|
||||
proxy_location: (str): Where to perform the search, specifically for local/regional results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.limit = limit
|
||||
self.device_type = device_type
|
||||
self.proxy_location = proxy_location
|
||||
|
||||
# build query parameters
|
||||
self.query_payload = {
|
||||
"num": limit,
|
||||
"gl": proxy_location.upper(),
|
||||
"hl": hl.lower()
|
||||
}
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"X-User-Agent": device_type,
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
if "query" in kwargs:
|
||||
self.query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
self.query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.search_url}{urlencode(self.query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
results = response.json()
|
||||
if "results" in results:
|
||||
results = results['results']
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['link']}",
|
||||
f"Description: {result['description'].strip()}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
@@ -0,0 +1,48 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class SerplyWebpageToMarkdownToolSchema(BaseModel):
|
||||
"""Input for Serply Search."""
|
||||
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
|
||||
|
||||
|
||||
class SerplyWebpageToMarkdownTool(RagTool):
|
||||
name: str = "Webpage to Markdown"
|
||||
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
|
||||
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
|
||||
request_url: str = "https://api.serply.io/v1/request"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
proxy_location: (str): Where to perform the search, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
data = {
|
||||
"url": kwargs["url"],
|
||||
"method": "GET",
|
||||
"response_type": "markdown"
|
||||
}
|
||||
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
|
||||
return response.text
|
||||
81
src/crewai_tools/tools/spider_tool/README.md
Normal file
81
src/crewai_tools/tools/spider_tool/README.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# SpiderTool
|
||||
|
||||
## Description
|
||||
|
||||
[Spider](https://spider.cloud/?ref=crewai) is the [fastest](https://github.com/spider-rs/spider/blob/main/benches/BENCHMARKS.md#benchmark-results) open source scraper and crawler that returns LLM-ready data. It converts any website into pure HTML, markdown, metadata or text while enabling you to crawl with custom actions using AI.
|
||||
|
||||
## Installation
|
||||
|
||||
To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK too:
|
||||
|
||||
```python
|
||||
pip install spider-client 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is already LLM-ready, so no need to do any cleaning there.
|
||||
|
||||
```python
|
||||
from crewai_tools import SpiderTool
|
||||
|
||||
def main():
|
||||
spider_tool = SpiderTool()
|
||||
|
||||
searcher = Agent(
|
||||
role="Web Research Expert",
|
||||
goal="Find related information from specific URL's",
|
||||
backstory="An expert web researcher that uses the web extremely well",
|
||||
tools=[spider_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
return_metadata = Task(
|
||||
description="Scrape https://spider.cloud with a limit of 1 and enable metadata",
|
||||
expected_output="Metadata and 10 word summary of spider.cloud",
|
||||
agent=searcher
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[searcher],
|
||||
tasks=[
|
||||
return_metadata,
|
||||
],
|
||||
verbose=2
|
||||
)
|
||||
|
||||
crew.kickoff()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key` (string, optional): Specifies Spider API key. If not specified, it looks for `SPIDER_API_KEY` in environment variables.
|
||||
- `params` (object, optional): Optional parameters for the request. Defaults to `{"return_format": "markdown"}` to return the website's content in a format that fits LLMs better.
|
||||
- `request` (string): The request type to perform. Possible values are `http`, `chrome`, and `smart`. Use `smart` to perform an HTTP request by default until JavaScript rendering is needed for the HTML.
|
||||
- `limit` (int): The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.
|
||||
- `depth` (int): The crawl limit for maximum depth. If `0`, no limit will be applied.
|
||||
- `cache` (bool): Use HTTP caching for the crawl to speed up repeated runs. Default is `true`.
|
||||
- `budget` (object): Object that has paths with a counter for limiting the amount of pages example `{"*":1}` for only crawling the root page.
|
||||
- `locale` (string): The locale to use for request, example `en-US`.
|
||||
- `cookies` (string): Add HTTP cookies to use for request.
|
||||
- `stealth` (bool): Use stealth mode for headless chrome request to help prevent being blocked. The default is `true` on chrome.
|
||||
- `headers` (object): Forward HTTP headers to use for all request. The object is expected to be a map of key value pairs.
|
||||
- `metadata` (bool): Boolean to store metadata about the pages and content found. This could help improve AI interopt. Defaults to `false` unless you have the website already stored with the configuration enabled.
|
||||
- `viewport` (object): Configure the viewport for chrome. Defaults to `800x600`.
|
||||
- `encoding` (string): The type of encoding to use like `UTF-8`, `SHIFT_JIS`, or etc.
|
||||
- `subdomains` (bool): Allow subdomains to be included. Default is `false`.
|
||||
- `user_agent` (string): Add a custom HTTP user agent to the request. By default this is set to a random agent.
|
||||
- `store_data` (bool): Boolean to determine if storage should be used. If set this takes precedence over `storageless`. Defaults to `false`.
|
||||
- `gpt_config` (object): Use AI to generate actions to perform during the crawl. You can pass an array for the `"prompt"` to chain steps.
|
||||
- `fingerprint` (bool): Use advanced fingerprint for chrome.
|
||||
- `storageless` (bool): Boolean to prevent storing any type of data for the request including storage and AI vectors embedding. Defaults to `false` unless you have the website already stored.
|
||||
- `readability` (bool): Use [readability](https://github.com/mozilla/readability) to pre-process the content for reading. This may drastically improve the content for LLM usage.
|
||||
`return_format` (string): The format to return the data in. Possible values are `markdown`, `raw`, `text`, and `html2text`. Use `raw` to return the default format of the page like HTML etc.
|
||||
- `proxy_enabled` (bool): Enable high performance premium proxies for the request to prevent being blocked at the network level.
|
||||
- `query_selector` (string): The CSS query selector to use when extracting content from the markup.
|
||||
- `full_resources` (bool): Crawl and download all the resources for a website.
|
||||
- `request_timeout` (int): The timeout to use for request. Timeouts can be from `5-60`. The default is `30` seconds.
|
||||
- `run_in_background` (bool): Run the request in the background. Useful if storing data and wanting to trigger crawls to the dashboard. This has no effect if storageless is set.
|
||||
59
src/crewai_tools/tools/spider_tool/spider_tool.py
Normal file
59
src/crewai_tools/tools/spider_tool/spider_tool.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from typing import Optional, Any, Type, Dict, Literal
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class SpiderToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
params: Optional[Dict[str, Any]] = Field(
|
||||
description="Set additional params. Options include:\n"
|
||||
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
|
||||
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
|
||||
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
|
||||
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
|
||||
)
|
||||
mode: Literal["scrape", "crawl"] = Field(
|
||||
default="scrape",
|
||||
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set."
|
||||
)
|
||||
|
||||
class SpiderTool(BaseTool):
|
||||
name: str = "Spider scrape & crawl tool"
|
||||
description: str = "Scrape & Crawl any url and return LLM-ready data."
|
||||
args_schema: Type[BaseModel] = SpiderToolSchema
|
||||
api_key: Optional[str] = None
|
||||
spider: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from spider import Spider # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`spider-client` package not found, please run `pip install spider-client`"
|
||||
)
|
||||
|
||||
self.spider = Spider(api_key=api_key)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
mode: Optional[Literal["scrape", "crawl"]] = "scrape"
|
||||
):
|
||||
if mode not in ["scrape", "crawl"]:
|
||||
raise ValueError(
|
||||
"Unknown mode in `mode` parameter, `scrape` or `crawl` are the allowed modes"
|
||||
)
|
||||
|
||||
# Ensure 'return_format': 'markdown' is always included
|
||||
if params:
|
||||
params["return_format"] = "markdown"
|
||||
else:
|
||||
params = {"return_format": "markdown"}
|
||||
|
||||
action = (
|
||||
self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
|
||||
)
|
||||
spider_docs = action(url=url, params=params)
|
||||
|
||||
return spider_docs
|
||||
@@ -57,4 +57,4 @@ class TXTSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
30
src/crewai_tools/tools/vision_tool/README.md
Normal file
30
src/crewai_tools/tools/vision_tool/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Vision Tool
|
||||
|
||||
## Description
|
||||
|
||||
This tool is used to extract text from images. When passed to the agent it will extract the text from the image and then use it to generate a response, report or any other output. The URL or the PATH of the image should be passed to the Agent.
|
||||
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
In order to use the VisionTool, the OpenAI API key should be set in the environment variable `OPENAI_API_KEY`.
|
||||
|
||||
```python
|
||||
from crewai_tools import VisionTool
|
||||
|
||||
vision_tool = VisionTool()
|
||||
|
||||
@agent
|
||||
def researcher(self) -> Agent:
|
||||
return Agent(
|
||||
config=self.agents_config["researcher"],
|
||||
allow_delegation=False,
|
||||
tools=[vision_tool]
|
||||
)
|
||||
```
|
||||
93
src/crewai_tools/tools/vision_tool/vision_tool.py
Normal file
93
src/crewai_tools/tools/vision_tool/vision_tool.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import base64
|
||||
from typing import Type
|
||||
|
||||
import requests
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from openai import OpenAI
|
||||
from pydantic.v1 import BaseModel
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
"""Input for Vision Tool."""
|
||||
|
||||
image_path_url: str = "The image path or URL."
|
||||
|
||||
|
||||
class VisionTool(BaseTool):
|
||||
name: str = "Vision Tool"
|
||||
description: str = (
|
||||
"This tool uses OpenAI's Vision API to describe the contents of an image."
|
||||
)
|
||||
args_schema: Type[BaseModel] = ImagePromptSchema
|
||||
|
||||
def _run_web_hosted_images(self, client, image_path_url: str) -> str:
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o-mini",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": image_path_url},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=300,
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def _run_local_images(self, client, image_path_url: str) -> str:
|
||||
base64_image = self._encode_image(image_path_url)
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {client.api_key}",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"max_tokens": 300,
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
"https://api.openai.com/v1/chat/completions", headers=headers, json=payload
|
||||
)
|
||||
|
||||
return response.json()["choices"][0]["message"]["content"]
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
client = OpenAI()
|
||||
|
||||
image_path_url = kwargs.get("image_path_url")
|
||||
|
||||
if not image_path_url:
|
||||
return "Image Path or URL is required."
|
||||
|
||||
if "http" in image_path_url:
|
||||
image_description = self._run_web_hosted_images(client, image_path_url)
|
||||
else:
|
||||
image_description = self._run_local_images(client, image_path_url)
|
||||
|
||||
return image_description
|
||||
|
||||
def _encode_image(self, image_path: str):
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
@@ -57,4 +57,4 @@ class WebsiteSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class XMLSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -60,4 +60,4 @@ class YoutubeChannelSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class YoutubeVideoSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
47
tests/spider_tool_test.py
Normal file
47
tests/spider_tool_test.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from crewai_tools.tools.spider_tool.spider_tool import SpiderTool
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
def test_spider_tool():
|
||||
spider_tool = SpiderTool()
|
||||
|
||||
searcher = Agent(
|
||||
role="Web Research Expert",
|
||||
goal="Find related information from specific URL's",
|
||||
backstory="An expert web researcher that uses the web extremely well",
|
||||
tools=[spider_tool],
|
||||
verbose=True,
|
||||
cache=False
|
||||
)
|
||||
|
||||
choose_between_scrape_crawl = Task(
|
||||
description="Scrape the page of spider.cloud and return a summary of how fast it is",
|
||||
expected_output="spider.cloud is a fast scraping and crawling tool",
|
||||
agent=searcher
|
||||
)
|
||||
|
||||
return_metadata = Task(
|
||||
description="Scrape https://spider.cloud with a limit of 1 and enable metadata",
|
||||
expected_output="Metadata and 10 word summary of spider.cloud",
|
||||
agent=searcher
|
||||
)
|
||||
|
||||
css_selector = Task(
|
||||
description="Scrape one page of spider.cloud with the `body > div > main > section.grid.md\:grid-cols-2.gap-10.place-items-center.md\:max-w-screen-xl.mx-auto.pb-8.pt-20 > div:nth-child(1) > h1` CSS selector",
|
||||
expected_output="The content of the element with the css selector body > div > main > section.grid.md\:grid-cols-2.gap-10.place-items-center.md\:max-w-screen-xl.mx-auto.pb-8.pt-20 > div:nth-child(1) > h1",
|
||||
agent=searcher
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[searcher],
|
||||
tasks=[
|
||||
choose_between_scrape_crawl,
|
||||
return_metadata,
|
||||
css_selector
|
||||
],
|
||||
verbose=2
|
||||
)
|
||||
|
||||
crew.kickoff()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_spider_tool()
|
||||
38
tests/tools/test_code_interpreter_tool.py
Normal file
38
tests/tools/test_code_interpreter_tool.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from crewai_tools.tools.code_interpreter_tool.code_interpreter_tool import (
|
||||
CodeInterpreterTool,
|
||||
)
|
||||
|
||||
|
||||
class TestCodeInterpreterTool(unittest.TestCase):
|
||||
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
|
||||
def test_run_code_in_docker(self, docker_mock):
|
||||
tool = CodeInterpreterTool()
|
||||
code = "print('Hello, World!')"
|
||||
libraries_used = "numpy,pandas"
|
||||
expected_output = "Hello, World!\n"
|
||||
|
||||
docker_mock.from_env().containers.run().exec_run().exit_code = 0
|
||||
docker_mock.from_env().containers.run().exec_run().output = (
|
||||
expected_output.encode()
|
||||
)
|
||||
result = tool.run_code_in_docker(code, libraries_used)
|
||||
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
|
||||
def test_run_code_in_docker_with_error(self, docker_mock):
|
||||
tool = CodeInterpreterTool()
|
||||
code = "print(1/0)"
|
||||
libraries_used = "numpy,pandas"
|
||||
expected_output = "Something went wrong while running the code: \nZeroDivisionError: division by zero\n"
|
||||
|
||||
docker_mock.from_env().containers.run().exec_run().exit_code = 1
|
||||
docker_mock.from_env().containers.run().exec_run().output = (
|
||||
b"ZeroDivisionError: division by zero\n"
|
||||
)
|
||||
result = tool.run_code_in_docker(code, libraries_used)
|
||||
|
||||
self.assertEqual(result, expected_output)
|
||||
Reference in New Issue
Block a user