mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
Merge branch 'main' into add-more-parameters-to-serperdev-search-payload
This commit is contained in:
@@ -1,26 +1,39 @@
|
||||
from .tools.base_tool import BaseTool, Tool, tool
|
||||
from .tools import (
|
||||
BrowserbaseLoadTool,
|
||||
CodeDocsSearchTool,
|
||||
CSVSearchTool,
|
||||
DirectorySearchTool,
|
||||
DOCXSearchTool,
|
||||
DirectoryReadTool,
|
||||
EXASearchTool,
|
||||
FileReadTool,
|
||||
GithubSearchTool,
|
||||
SerperDevTool,
|
||||
TXTSearchTool,
|
||||
JSONSearchTool,
|
||||
MDXSearchTool,
|
||||
PDFSearchTool,
|
||||
PGSearchTool,
|
||||
RagTool,
|
||||
ScrapeElementFromWebsiteTool,
|
||||
ScrapeWebsiteTool,
|
||||
SeleniumScrapingTool,
|
||||
WebsiteSearchTool,
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool,
|
||||
)
|
||||
BrowserbaseLoadTool,
|
||||
CodeDocsSearchTool,
|
||||
CodeInterpreterTool,
|
||||
ComposioTool,
|
||||
CSVSearchTool,
|
||||
DirectoryReadTool,
|
||||
DirectorySearchTool,
|
||||
DOCXSearchTool,
|
||||
EXASearchTool,
|
||||
FileReadTool,
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
FirecrawlSearchTool,
|
||||
GithubSearchTool,
|
||||
JSONSearchTool,
|
||||
LlamaIndexTool,
|
||||
MDXSearchTool,
|
||||
MultiOnTool,
|
||||
PDFSearchTool,
|
||||
PGSearchTool,
|
||||
RagTool,
|
||||
ScrapeElementFromWebsiteTool,
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
ScrapeWebsiteTool,
|
||||
SeleniumScrapingTool,
|
||||
SerperDevTool,
|
||||
SerplyWebSearchTool,
|
||||
SerplyNewsSearchTool,
|
||||
SerplyScholarSearchTool,
|
||||
SerplyWebpageToMarkdownTool,
|
||||
SerplyJobSearchTool,
|
||||
TXTSearchTool,
|
||||
WebsiteSearchTool,
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool
|
||||
)
|
||||
from .tools.base_tool import BaseTool, Tool, tool
|
||||
|
||||
32
src/crewai_tools/adapters/pdf_embedchain_adapter.py
Normal file
32
src/crewai_tools/adapters/pdf_embedchain_adapter.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from typing import Any, Optional
|
||||
|
||||
from embedchain import App
|
||||
|
||||
from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
|
||||
|
||||
class PDFEmbedchainAdapter(Adapter):
|
||||
embedchain_app: App
|
||||
summarize: bool = False
|
||||
src: Optional[str] = None
|
||||
|
||||
def query(self, question: str) -> str:
|
||||
where = (
|
||||
{"app_id": self.embedchain_app.config.id, "source": self.src}
|
||||
if self.src
|
||||
else None
|
||||
)
|
||||
result, sources = self.embedchain_app.query(
|
||||
question, citations=True, dry_run=(not self.summarize), where=where
|
||||
)
|
||||
if self.summarize:
|
||||
return result
|
||||
return "\n\n".join([source[0] for source in sources])
|
||||
|
||||
def add(
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self.src = args[0] if args else None
|
||||
self.embedchain_app.add(*args, **kwargs)
|
||||
@@ -1,23 +1,40 @@
|
||||
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
|
||||
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
|
||||
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
|
||||
from .composio_tool.composio_tool import ComposioTool
|
||||
from .csv_search_tool.csv_search_tool import CSVSearchTool
|
||||
from .directory_search_tool.directory_search_tool import DirectorySearchTool
|
||||
from .directory_read_tool.directory_read_tool import DirectoryReadTool
|
||||
from .directory_search_tool.directory_search_tool import DirectorySearchTool
|
||||
from .docx_search_tool.docx_search_tool import DOCXSearchTool
|
||||
from .exa_tools.exa_search_tool import EXASearchTool
|
||||
from .file_read_tool.file_read_tool import FileReadTool
|
||||
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import FirecrawlCrawlWebsiteTool
|
||||
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import FirecrawlScrapeWebsiteTool
|
||||
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||
from .github_search_tool.github_search_tool import GithubSearchTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||
from .json_search_tool.json_search_tool import JSONSearchTool
|
||||
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
|
||||
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
||||
from .multion_tool.multion_tool import MultiOnTool
|
||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
||||
from .rag.rag_tool import RagTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import ScrapeElementFromWebsiteTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import (
|
||||
ScrapeElementFromWebsiteTool,
|
||||
)
|
||||
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
||||
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ScrapflyScrapeWebsiteTool
|
||||
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
|
||||
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
|
||||
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
|
||||
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
|
||||
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
|
||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||
from .website_search.website_search_tool import WebsiteSearchTool
|
||||
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||
YoutubeChannelSearchTool,
|
||||
)
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchToo
|
||||
@@ -22,6 +22,8 @@ class BaseTool(BaseModel, ABC):
|
||||
"""Flag to check if the description has been updated."""
|
||||
cache_function: Optional[Callable] = lambda _args, _result: True
|
||||
"""Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
|
||||
result_as_answer: bool = False
|
||||
"""Flag to check if the tool should be the final agent answer."""
|
||||
|
||||
@validator("args_schema", always=True, pre=True)
|
||||
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
|
||||
@@ -85,12 +87,15 @@ class BaseTool(BaseModel, ABC):
|
||||
|
||||
def _generate_description(self):
|
||||
args = []
|
||||
args_description = []
|
||||
for arg, attribute in self.args_schema.schema()["properties"].items():
|
||||
if "type" in attribute:
|
||||
args.append(f"{arg}: '{attribute['type']}'")
|
||||
if "description" in attribute:
|
||||
args_description.append(f"{arg}: '{attribute['description']}'")
|
||||
|
||||
description = self.description.replace("\n", " ")
|
||||
self.description = f"{self.name}({', '.join(args)}) - {description}"
|
||||
self.description = f"{self.name}({', '.join(args)}) - {description} {', '.join(args_description)}"
|
||||
|
||||
|
||||
class Tool(BaseTool):
|
||||
|
||||
@@ -57,4 +57,4 @@ class CodeDocsSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
14
src/crewai_tools/tools/code_interpreter_tool/Dockerfile
Normal file
14
src/crewai_tools/tools/code_interpreter_tool/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install common utilities
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
wget \
|
||||
software-properties-common
|
||||
|
||||
# Clean up
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /workspace
|
||||
29
src/crewai_tools/tools/code_interpreter_tool/README.md
Normal file
29
src/crewai_tools/tools/code_interpreter_tool/README.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# CodeInterpreterTool
|
||||
|
||||
## Description
|
||||
This tool is used to give the Agent the ability to run code (Python3) from the code generated by the Agent itself. The code is executed in a sandboxed environment, so it is safe to run any code.
|
||||
|
||||
It is incredible useful since it allows the Agent to generate code, run it in the same environment, get the result and use it to make decisions.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Docker
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Remember that when using this tool, the code must be generated by the Agent itself. The code must be a Python3 code. And it will take some time for the first time to run because it needs to build the Docker image.
|
||||
|
||||
```python
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[CodeInterpreterTool()],
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,94 @@
|
||||
import importlib.util
|
||||
import os
|
||||
from typing import List, Optional, Type
|
||||
|
||||
import docker
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
"""Input for CodeInterpreterTool."""
|
||||
|
||||
code: str = Field(
|
||||
...,
|
||||
description="Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code",
|
||||
)
|
||||
|
||||
libraries_used: List[str] = Field(
|
||||
...,
|
||||
description="List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4",
|
||||
)
|
||||
|
||||
|
||||
class CodeInterpreterTool(BaseTool):
|
||||
name: str = "Code Interpreter"
|
||||
description: str = "Interprets Python3 code strings with a final print statement."
|
||||
args_schema: Type[BaseModel] = CodeInterpreterSchema
|
||||
code: Optional[str] = None
|
||||
|
||||
@staticmethod
|
||||
def _get_installed_package_path():
|
||||
spec = importlib.util.find_spec("crewai_tools")
|
||||
return os.path.dirname(spec.origin)
|
||||
|
||||
def _verify_docker_image(self) -> None:
|
||||
"""
|
||||
Verify if the Docker image is available
|
||||
"""
|
||||
image_tag = "code-interpreter:latest"
|
||||
client = docker.from_env()
|
||||
|
||||
try:
|
||||
client.images.get(image_tag)
|
||||
|
||||
except docker.errors.ImageNotFound:
|
||||
package_path = self._get_installed_package_path()
|
||||
dockerfile_path = os.path.join(package_path, "tools/code_interpreter_tool")
|
||||
if not os.path.exists(dockerfile_path):
|
||||
raise FileNotFoundError(f"Dockerfile not found in {dockerfile_path}")
|
||||
|
||||
client.images.build(
|
||||
path=dockerfile_path,
|
||||
tag=image_tag,
|
||||
rm=True,
|
||||
)
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
code = kwargs.get("code", self.code)
|
||||
libraries_used = kwargs.get("libraries_used", [])
|
||||
return self.run_code_in_docker(code, libraries_used)
|
||||
|
||||
def _install_libraries(
|
||||
self, container: docker.models.containers.Container, libraries: List[str]
|
||||
) -> None:
|
||||
"""
|
||||
Install missing libraries in the Docker container
|
||||
"""
|
||||
for library in libraries:
|
||||
container.exec_run(f"pip install {library}")
|
||||
|
||||
def _init_docker_container(self) -> docker.models.containers.Container:
|
||||
client = docker.from_env()
|
||||
return client.containers.run(
|
||||
"code-interpreter",
|
||||
detach=True,
|
||||
tty=True,
|
||||
working_dir="/workspace",
|
||||
name="code-interpreter",
|
||||
)
|
||||
|
||||
def run_code_in_docker(self, code: str, libraries_used: List[str]) -> str:
|
||||
self._verify_docker_image()
|
||||
container = self._init_docker_container()
|
||||
self._install_libraries(container, libraries_used)
|
||||
|
||||
cmd_to_run = f'python3 -c "{code}"'
|
||||
exec_result = container.exec_run(cmd_to_run)
|
||||
|
||||
container.stop()
|
||||
container.remove()
|
||||
|
||||
if exec_result.exit_code != 0:
|
||||
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
|
||||
return exec_result.output.decode("utf-8")
|
||||
72
src/crewai_tools/tools/composio_tool/README.md
Normal file
72
src/crewai_tools/tools/composio_tool/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# ComposioTool Documentation
|
||||
|
||||
## Description
|
||||
|
||||
This tools is a wrapper around the composio toolset and gives your agent access to a wide variety of tools from the composio SDK.
|
||||
|
||||
## Installation
|
||||
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```shell
|
||||
pip install composio-core
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
after the installation is complete, either run `composio login` or export your composio API key as `COMPOSIO_API_KEY`.
|
||||
|
||||
## Example
|
||||
|
||||
The following example demonstrates how to initialize the tool and execute a github action:
|
||||
|
||||
1. Initialize toolset
|
||||
|
||||
```python
|
||||
from composio import App
|
||||
from crewai_tools import ComposioTool
|
||||
from crewai import Agent, Task
|
||||
|
||||
|
||||
tools = [ComposioTool.from_action(action=Action.GITHUB_ACTIVITY_STAR_REPO_FOR_AUTHENTICATED_USER)]
|
||||
```
|
||||
|
||||
If you don't know what action you want to use, use `from_app` and `tags` filter to get relevant actions
|
||||
|
||||
```python
|
||||
tools = ComposioTool.from_app(App.GITHUB, tags=["important"])
|
||||
```
|
||||
|
||||
or use `use_case` to search relevant actions
|
||||
|
||||
```python
|
||||
tools = ComposioTool.from_app(App.GITHUB, use_case="Star a github repository")
|
||||
```
|
||||
|
||||
2. Define agent
|
||||
|
||||
```python
|
||||
crewai_agent = Agent(
|
||||
role="Github Agent",
|
||||
goal="You take action on Github using Github APIs",
|
||||
backstory=(
|
||||
"You are AI agent that is responsible for taking actions on Github "
|
||||
"on users behalf. You need to take action on Github using Github APIs"
|
||||
),
|
||||
verbose=True,
|
||||
tools=tools,
|
||||
)
|
||||
```
|
||||
|
||||
3. Execute task
|
||||
|
||||
```python
|
||||
task = Task(
|
||||
description="Star a repo ComposioHQ/composio on GitHub",
|
||||
agent=crewai_agent,
|
||||
expected_output="if the star happened",
|
||||
)
|
||||
|
||||
task.execute()
|
||||
```
|
||||
|
||||
* More detailed list of tools can be found [here](https://app.composio.dev)
|
||||
122
src/crewai_tools/tools/composio_tool/composio_tool.py
Normal file
122
src/crewai_tools/tools/composio_tool/composio_tool.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Composio tools wrapper.
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
import typing_extensions as te
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class ComposioTool(BaseTool):
|
||||
"""Wrapper for composio tools."""
|
||||
|
||||
composio_action: t.Callable
|
||||
|
||||
def _run(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
|
||||
"""Run the composio action with given arguments."""
|
||||
return self.composio_action(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _check_connected_account(tool: t.Any, toolset: t.Any) -> None:
|
||||
"""Check if connected account is required and if required it exists or not."""
|
||||
from composio import Action
|
||||
from composio.client.collections import ConnectedAccountModel
|
||||
|
||||
tool = t.cast(Action, tool)
|
||||
if tool.no_auth:
|
||||
return
|
||||
|
||||
connections = t.cast(
|
||||
t.List[ConnectedAccountModel],
|
||||
toolset.client.connected_accounts.get(),
|
||||
)
|
||||
if tool.app not in [connection.appUniqueId for connection in connections]:
|
||||
raise RuntimeError(
|
||||
f"No connected account found for app `{tool.app}`; "
|
||||
f"Run `composio add {tool.app}` to fix this"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_action(
|
||||
cls,
|
||||
action: t.Any,
|
||||
**kwargs: t.Any,
|
||||
) -> te.Self:
|
||||
"""Wrap a composio tool as crewAI tool."""
|
||||
|
||||
from composio import Action, ComposioToolSet
|
||||
from composio.constants import DEFAULT_ENTITY_ID
|
||||
from composio.utils.shared import json_schema_to_model
|
||||
|
||||
toolset = ComposioToolSet()
|
||||
if not isinstance(action, Action):
|
||||
action = Action(action)
|
||||
|
||||
action = t.cast(Action, action)
|
||||
cls._check_connected_account(
|
||||
tool=action,
|
||||
toolset=toolset,
|
||||
)
|
||||
|
||||
(action_schema,) = toolset.get_action_schemas(actions=[action])
|
||||
schema = action_schema.model_dump(exclude_none=True)
|
||||
entity_id = kwargs.pop("entity_id", DEFAULT_ENTITY_ID)
|
||||
|
||||
def function(**kwargs: t.Any) -> t.Dict:
|
||||
"""Wrapper function for composio action."""
|
||||
return toolset.execute_action(
|
||||
action=Action(schema["name"]),
|
||||
params=kwargs,
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
function.__name__ = schema["name"]
|
||||
function.__doc__ = schema["description"]
|
||||
|
||||
return cls(
|
||||
name=schema["name"],
|
||||
description=schema["description"],
|
||||
args_schema=json_schema_to_model(
|
||||
action_schema.parameters.model_dump(
|
||||
exclude_none=True,
|
||||
)
|
||||
),
|
||||
composio_action=function,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_app(
|
||||
cls,
|
||||
*apps: t.Any,
|
||||
tags: t.Optional[t.List[str]] = None,
|
||||
use_case: t.Optional[str] = None,
|
||||
**kwargs: t.Any,
|
||||
) -> t.List[te.Self]:
|
||||
"""Create toolset from an app."""
|
||||
if len(apps) == 0:
|
||||
raise ValueError("You need to provide at least one app name")
|
||||
|
||||
if use_case is None and tags is None:
|
||||
raise ValueError("Both `use_case` and `tags` cannot be `None`")
|
||||
|
||||
if use_case is not None and tags is not None:
|
||||
raise ValueError(
|
||||
"Cannot use both `use_case` and `tags` to filter the actions"
|
||||
)
|
||||
|
||||
from composio import ComposioToolSet
|
||||
|
||||
toolset = ComposioToolSet()
|
||||
if use_case is not None:
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_use_case(*apps, use_case=use_case)
|
||||
]
|
||||
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags)
|
||||
]
|
||||
@@ -57,4 +57,4 @@ class CSVSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class DirectorySearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -63,4 +63,4 @@ class DOCXSearchTool(RagTool):
|
||||
docx = kwargs.get("docx")
|
||||
if docx is not None:
|
||||
self.add(docx)
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# FirecrawlCrawlWebsiteTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlScrapeFromWebsiteTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlCrawlWebsiteTool
|
||||
|
||||
tool = FirecrawlCrawlWebsiteTool(url='firecrawl.dev')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `url`: The base URL to start crawling from.
|
||||
- `page_options`: Optional.
|
||||
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
|
||||
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
|
||||
- `crawler_options`: Optional. Options for controlling the crawling behavior.
|
||||
- `includes`: Optional. URL patterns to include in the crawl.
|
||||
- `exclude`: Optional. URL patterns to exclude from the crawl.
|
||||
- `generateImgAltText`: Optional. Generate alt text for images using LLMs (requires a paid plan).
|
||||
- `returnOnlyUrls`: Optional. If true, returns only the URLs as a list in the crawl status. Note: the response will be a list of URLs inside the data, not a list of documents.
|
||||
- `maxDepth`: Optional. Maximum depth to crawl. Depth 1 is the base URL, depth 2 includes the base URL and its direct children, and so on.
|
||||
- `mode`: Optional. The crawling mode to use. Fast mode crawls 4x faster on websites without a sitemap but may not be as accurate and shouldn't be used on heavily JavaScript-rendered websites.
|
||||
- `limit`: Optional. Maximum number of pages to crawl.
|
||||
- `timeout`: Optional. Timeout in milliseconds for the crawling operation.
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
from typing import Optional, Any, Type, Dict, List
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
description: str = "Crawl webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
|
||||
options = {
|
||||
"crawlerOptions": crawler_options,
|
||||
"pageOptions": page_options
|
||||
}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
@@ -0,0 +1,38 @@
|
||||
# FirecrawlScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlScrapeWebsiteTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlScrapeWebsiteTool
|
||||
|
||||
tool = FirecrawlScrapeWebsiteTool(url='firecrawl.dev')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `url`: The URL to scrape.
|
||||
- `page_options`: Optional.
|
||||
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
|
||||
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
|
||||
- `extractor_options`: Optional. Options for LLM-based extraction of structured information from the page content
|
||||
- `mode`: The extraction mode to use, currently supports 'llm-extraction'
|
||||
- `extractionPrompt`: Optional. A prompt describing what information to extract from the page
|
||||
- `extractionSchema`: Optional. The schema for the data to be extracted
|
||||
- `timeout`: Optional. Timeout in milliseconds for the request
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
from typing import Optional, Any, Type, Dict
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
|
||||
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
|
||||
timeout: Optional[int] = Field(default=None, description="Timeout for the scraping operation")
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web scrape tool"
|
||||
description: str = "Scrape webpages url using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"extractorOptions": extractor_options,
|
||||
"timeout": timeout
|
||||
}
|
||||
return self.firecrawl.scrape_url(url, options)
|
||||
35
src/crewai_tools/tools/firecrawl_search_tool/README.md
Normal file
35
src/crewai_tools/tools/firecrawl_search_tool/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# FirecrawlSearchTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlSearchTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlSearchTool
|
||||
|
||||
tool = FirecrawlSearchTool(query='what is firecrawl?')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `query`: The search query string to be used for searching.
|
||||
- `page_options`: Optional. Options for result formatting.
|
||||
- `onlyMainContent`: Optional. Only return the main content of the page excluding headers, navs, footers, etc.
|
||||
- `includeHtml`: Optional. Include the raw HTML content of the page. Will output a html key in the response.
|
||||
- `fetchPageContent`: Optional. Fetch the full content of the page.
|
||||
- `search_options`: Optional. Options for controlling the crawling behavior.
|
||||
- `limit`: Optional. Maximum number of pages to crawl.
|
||||
@@ -0,0 +1,33 @@
|
||||
from typing import Optional, Any, Type, Dict, List
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class FirecrawlSearchToolSchema(BaseModel):
|
||||
query: str = Field(description="Search query")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
|
||||
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
|
||||
|
||||
class FirecrawlSearchTool(BaseTool):
|
||||
name: str = "Firecrawl web search tool"
|
||||
description: str = "Search webpages using Firecrawl and return the results"
|
||||
args_schema: Type[BaseModel] = FirecrawlSearchToolSchema
|
||||
api_key: Optional[str] = None
|
||||
firecrawl: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"resultOptions": result_options
|
||||
}
|
||||
return self.firecrawl.search(query, options)
|
||||
@@ -68,4 +68,4 @@ class GithubSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class JSONSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
53
src/crewai_tools/tools/llamaindex_tool/README.md
Normal file
53
src/crewai_tools/tools/llamaindex_tool/README.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# LlamaIndexTool Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to be a general wrapper around LlamaIndex tools and query engines, enabling you to leverage LlamaIndex resources
|
||||
in terms of RAG/agentic pipelines as tools to plug into CrewAI agents.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import LlamaIndexTool
|
||||
|
||||
# Initialize the tool from a LlamaIndex Tool
|
||||
|
||||
## Example 1: Initialize from FunctionTool
|
||||
from llama_index.core.tools import FunctionTool
|
||||
|
||||
your_python_function = lambda ...: ...
|
||||
og_tool = FunctionTool.from_defaults(your_python_function, name="<name>", description='<description>')
|
||||
tool = LlamaIndexTool.from_tool(og_tool)
|
||||
|
||||
## Example 2: Initialize from LlamaHub Tools
|
||||
from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
|
||||
wolfram_spec = WolframAlphaToolSpec(app_id="<app_id>")
|
||||
wolfram_tools = wolfram_spec.to_tool_list()
|
||||
tools = [LlamaIndexTool.from_tool(t) for t in wolfram_tools]
|
||||
|
||||
|
||||
# Initialize Tool from a LlamaIndex Query Engine
|
||||
|
||||
## NOTE: LlamaIndex has a lot of query engines, define whatever query engine you want
|
||||
query_engine = index.as_query_engine()
|
||||
query_tool = LlamaIndexTool.from_query_engine(
|
||||
query_engine,
|
||||
name="Uber 2019 10K Query Tool",
|
||||
description="Use this tool to lookup the 2019 Uber 10K Annual Report"
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `LlamaIndexTool`, follow these steps:
|
||||
|
||||
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **Install and use LlamaIndex**: Follow LlamaIndex documentation (https://docs.llamaindex.ai/) to setup a RAG/agent pipeline.
|
||||
|
||||
|
||||
84
src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py
Normal file
84
src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
|
||||
from typing import Type, Any, cast, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
class LlamaIndexTool(BaseTool):
|
||||
"""Tool to wrap LlamaIndex tools/query engines."""
|
||||
llama_index_tool: Any
|
||||
|
||||
def _run(
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run tool."""
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
||||
return tool(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_tool(
|
||||
cls,
|
||||
tool: Any,
|
||||
**kwargs: Any
|
||||
) -> "LlamaIndexTool":
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
|
||||
if not isinstance(tool, LlamaBaseTool):
|
||||
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
||||
tool = cast(LlamaBaseTool, tool)
|
||||
|
||||
if tool.metadata.fn_schema is None:
|
||||
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
|
||||
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
|
||||
|
||||
return cls(
|
||||
name=tool.metadata.name,
|
||||
description=tool.metadata.description,
|
||||
args_schema=args_schema,
|
||||
llama_index_tool=tool,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_query_engine(
|
||||
cls,
|
||||
query_engine: Any,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
return_direct: bool = False,
|
||||
**kwargs: Any
|
||||
) -> "LlamaIndexTool":
|
||||
from llama_index.core.query_engine import BaseQueryEngine
|
||||
from llama_index.core.tools import QueryEngineTool
|
||||
|
||||
if not isinstance(query_engine, BaseQueryEngine):
|
||||
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
||||
|
||||
# NOTE: by default the schema expects an `input` variable. However this
|
||||
# confuses crewAI so we are renaming to `query`.
|
||||
class QueryToolSchema(BaseModel):
|
||||
"""Schema for query tool."""
|
||||
query: str = Field(..., description="Search query for the query tool.")
|
||||
|
||||
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
|
||||
query_engine_tool = QueryEngineTool.from_defaults(
|
||||
query_engine,
|
||||
name=name,
|
||||
description=description,
|
||||
return_direct=return_direct,
|
||||
resolve_input_errors=True,
|
||||
)
|
||||
# HACK: we are replacing the schema with our custom schema
|
||||
query_engine_tool.metadata.fn_schema = QueryToolSchema
|
||||
|
||||
return cls.from_tool(
|
||||
query_engine_tool,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@@ -57,4 +57,4 @@ class MDXSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
54
src/crewai_tools/tools/multion_tool/README.md
Normal file
54
src/crewai_tools/tools/multion_tool/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# MultiOnTool Documentation
|
||||
|
||||
## Description
|
||||
The MultiOnTool, integrated within the crewai_tools package, empowers CrewAI agents with the capability to navigate and interact with the web through natural language instructions. Leveraging the Multion API, this tool facilitates seamless web browsing, making it an essential asset for projects requiring dynamic web data interaction.
|
||||
|
||||
## Installation
|
||||
Ensure the `crewai[tools]` package is installed in your environment to use the MultiOnTool. If it's not already installed, you can add it using the command below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools import MultiOnTool
|
||||
|
||||
# Initialize the tool from a MultiOn Tool
|
||||
multion_tool = MultiOnTool(api_key= "YOUR_MULTION_API_KEY", local=False)
|
||||
|
||||
Browser = Agent(
|
||||
role="Browser Agent",
|
||||
goal="control web browsers using natural language ",
|
||||
backstory="An expert browsing agent.",
|
||||
tools=[multion_remote_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# example task to search and summarize news
|
||||
browse = Task(
|
||||
description="Summarize the top 3 trending AI News headlines",
|
||||
expected_output="A summary of the top 3 trending AI News headlines",
|
||||
agent=Browser,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[Browser], tasks=[browse])
|
||||
|
||||
crew.kickoff()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
|
||||
- `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked.
|
||||
- `max_steps`: Optional. Set the max_steps the multion agent can take for a command
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `MultiOnTool`, follow these steps:
|
||||
|
||||
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension).
|
||||
3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API
|
||||
|
||||
29
src/crewai_tools/tools/multion_tool/example.py
Normal file
29
src/crewai_tools/tools/multion_tool/example.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
|
||||
from crewai import Agent, Crew, Task
|
||||
from multion_tool import MultiOnTool
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "Your Key"
|
||||
|
||||
multion_browse_tool = MultiOnTool(api_key="Your Key")
|
||||
|
||||
# Create a new agent
|
||||
Browser = Agent(
|
||||
role="Browser Agent",
|
||||
goal="control web browsers using natural language ",
|
||||
backstory="An expert browsing agent.",
|
||||
tools=[multion_browse_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Define tasks
|
||||
browse = Task(
|
||||
description="Summarize the top 3 trending AI News headlines",
|
||||
expected_output="A summary of the top 3 trending AI News headlines",
|
||||
agent=Browser,
|
||||
)
|
||||
|
||||
|
||||
crew = Crew(agents=[Browser], tasks=[browse])
|
||||
|
||||
crew.kickoff()
|
||||
65
src/crewai_tools/tools/multion_tool/multion_tool.py
Normal file
65
src/crewai_tools/tools/multion_tool/multion_tool.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Multion tool spec."""
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class MultiOnTool(BaseTool):
|
||||
"""Tool to wrap MultiOn Browse Capabilities."""
|
||||
|
||||
name: str = "Multion Browse Tool"
|
||||
description: str = """Multion gives the ability for LLMs to control web browsers using natural language instructions.
|
||||
If the status is 'CONTINUE', reissue the same instruction to continue execution
|
||||
"""
|
||||
multion: Optional[Any] = None
|
||||
session_id: Optional[str] = None
|
||||
local: bool = False
|
||||
max_steps: int = 3
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
local: bool = False,
|
||||
max_steps: int = 3,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from multion.client import MultiOn # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`multion` package not found, please run `pip install multion`"
|
||||
)
|
||||
self.session_id = None
|
||||
self.local = local
|
||||
self.multion = MultiOn(api_key=api_key)
|
||||
self.max_steps = max_steps
|
||||
|
||||
def _run(
|
||||
self,
|
||||
cmd: str,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""
|
||||
Run the Multion client with the given command.
|
||||
|
||||
Args:
|
||||
cmd (str): The detailed and specific natural language instructrion for web browsing
|
||||
|
||||
*args (Any): Additional arguments to pass to the Multion client
|
||||
**kwargs (Any): Additional keyword arguments to pass to the Multion client
|
||||
"""
|
||||
|
||||
browse = self.multion.browse(
|
||||
cmd=cmd,
|
||||
session_id=self.session_id,
|
||||
local=self.local,
|
||||
max_steps=self.max_steps,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
self.session_id = browse.session_id
|
||||
|
||||
return browse.message + "\n\n STATUS: " + browse.status
|
||||
@@ -1,6 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic import model_validator
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
@@ -35,6 +36,22 @@ class PDFSearchTool(RagTool):
|
||||
self.args_schema = FixedPDFSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _set_default_adapter(self):
|
||||
if isinstance(self.adapter, RagTool._AdapterPlaceholder):
|
||||
from embedchain import App
|
||||
|
||||
from crewai_tools.adapters.pdf_embedchain_adapter import (
|
||||
PDFEmbedchainAdapter,
|
||||
)
|
||||
|
||||
app = App.from_config(config=self.config) if self.config else App()
|
||||
self.adapter = PDFEmbedchainAdapter(
|
||||
embedchain_app=app, summarize=self.summarize
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def add(
|
||||
self,
|
||||
*args: Any,
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
from typing import Any, Optional, Type
|
||||
from pydantic import BaseModel, Field
|
||||
from pypdf import PdfReader, PdfWriter, PageObject, ContentStream, NameObject, Font
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class PDFTextWritingToolSchema(BaseModel):
|
||||
"""Input schema for PDFTextWritingTool."""
|
||||
pdf_path: str = Field(..., description="Path to the PDF file to modify")
|
||||
text: str = Field(..., description="Text to add to the PDF")
|
||||
position: tuple = Field(..., description="Tuple of (x, y) coordinates for text placement")
|
||||
font_size: int = Field(default=12, description="Font size of the text")
|
||||
font_color: str = Field(default="0 0 0 rg", description="RGB color code for the text")
|
||||
font_name: Optional[str] = Field(default="F1", description="Font name for standard fonts")
|
||||
font_file: Optional[str] = Field(None, description="Path to a .ttf font file for custom font usage")
|
||||
page_number: int = Field(default=0, description="Page number to add text to")
|
||||
|
||||
|
||||
class PDFTextWritingTool(RagTool):
|
||||
"""A tool to add text to specific positions in a PDF, with custom font support."""
|
||||
name: str = "PDF Text Writing Tool"
|
||||
description: str = "A tool that can write text to a specific position in a PDF document, with optional custom font embedding."
|
||||
args_schema: Type[BaseModel] = PDFTextWritingToolSchema
|
||||
|
||||
def run(self, pdf_path: str, text: str, position: tuple, font_size: int, font_color: str,
|
||||
font_name: str = "F1", font_file: Optional[str] = None, page_number: int = 0, **kwargs) -> str:
|
||||
reader = PdfReader(pdf_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
if page_number >= len(reader.pages):
|
||||
return "Page number out of range."
|
||||
|
||||
page: PageObject = reader.pages[page_number]
|
||||
content = ContentStream(page["/Contents"].data, reader)
|
||||
|
||||
if font_file:
|
||||
# Check if the font file exists
|
||||
if not Path(font_file).exists():
|
||||
return "Font file does not exist."
|
||||
|
||||
# Embed the custom font
|
||||
font_name = self.embed_font(writer, font_file)
|
||||
|
||||
# Prepare text operation with the custom or standard font
|
||||
x_position, y_position = position
|
||||
text_operation = f"BT /{font_name} {font_size} Tf {x_position} {y_position} Td ({text}) Tj ET"
|
||||
content.operations.append([font_color]) # Set color
|
||||
content.operations.append([text_operation]) # Add text
|
||||
|
||||
# Replace old content with new content
|
||||
page[NameObject("/Contents")] = content
|
||||
writer.add_page(page)
|
||||
|
||||
# Save the new PDF
|
||||
output_pdf_path = "modified_output.pdf"
|
||||
with open(output_pdf_path, "wb") as out_file:
|
||||
writer.write(out_file)
|
||||
|
||||
return f"Text added to {output_pdf_path} successfully."
|
||||
|
||||
def embed_font(self, writer: PdfWriter, font_file: str) -> str:
|
||||
"""Embeds a TTF font into the PDF and returns the font name."""
|
||||
with open(font_file, "rb") as file:
|
||||
font = Font.true_type(file.read())
|
||||
font_ref = writer.add_object(font)
|
||||
return font_ref
|
||||
@@ -41,4 +41,4 @@ class PGSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -25,8 +25,7 @@ class ScrapeWebsiteTool(BaseTool):
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Accept-Encoding': 'gzip, deflate, br'
|
||||
'Upgrade-Insecure-Requests': '1'
|
||||
}
|
||||
|
||||
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
# ScrapflyScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
[ScrapFly](https://scrapfly.io/) is a web scraping API with headless browser capabilities, proxies, and anti-bot bypass. It allows for extracting web page data into accessible LLM markdown or text.
|
||||
|
||||
## Setup and Installation
|
||||
1. **Install ScrapFly Python SDK**: Install `scrapfly-sdk` Python package is installed to use the ScrapFly Web Loader. Install it via pip with the following command:
|
||||
|
||||
```bash
|
||||
pip install scrapfly-sdk
|
||||
```
|
||||
|
||||
2. **API Key**: Register for free from [scrapfly.io/register](https://www.scrapfly.io/register/) to obtain your API key.
|
||||
|
||||
## Example Usage
|
||||
|
||||
Utilize the ScrapflyScrapeWebsiteTool as follows to retrieve a web page data as text, markdown (LLM accissible) or HTML:
|
||||
|
||||
```python
|
||||
from crewai_tools import ScrapflyScrapeWebsiteTool
|
||||
|
||||
tool = ScrapflyScrapeWebsiteTool(
|
||||
api_key="Your ScrapFly API key"
|
||||
)
|
||||
|
||||
result = tool._run(
|
||||
url="https://web-scraping.dev/products",
|
||||
scrape_format="markdown",
|
||||
ignore_scrape_failures=True
|
||||
)
|
||||
```
|
||||
|
||||
## Additional Arguments
|
||||
The ScrapflyScrapeWebsiteTool also allows passigng ScrapeConfig object for customizing the scrape request. See the [API params documentation](https://scrapfly.io/docs/scrape-api/getting-started) for the full feature details and their API params:
|
||||
```python
|
||||
from crewai_tools import ScrapflyScrapeWebsiteTool
|
||||
|
||||
tool = ScrapflyScrapeWebsiteTool(
|
||||
api_key="Your ScrapFly API key"
|
||||
)
|
||||
|
||||
scrapfly_scrape_config = {
|
||||
"asp": True, # Bypass scraping blocking and solutions, like Cloudflare
|
||||
"render_js": True, # Enable JavaScript rendering with a cloud headless browser
|
||||
"proxy_pool": "public_residential_pool", # Select a proxy pool (datacenter or residnetial)
|
||||
"country": "us", # Select a proxy location
|
||||
"auto_scroll": True, # Auto scroll the page
|
||||
"js": "" # Execute custom JavaScript code by the headless browser
|
||||
}
|
||||
|
||||
result = tool._run(
|
||||
url="https://web-scraping.dev/products",
|
||||
scrape_format="markdown",
|
||||
ignore_scrape_failures=True,
|
||||
scrape_config=scrapfly_scrape_config
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,47 @@
|
||||
import logging
|
||||
|
||||
from typing import Optional, Any, Type, Dict, Literal
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Webpage URL")
|
||||
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format")
|
||||
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config")
|
||||
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures")
|
||||
|
||||
class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Scrapfly web scraping API tool"
|
||||
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text"
|
||||
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
|
||||
api_key: str = None
|
||||
scrapfly: Optional[Any] = None
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
super().__init__()
|
||||
try:
|
||||
from scrapfly import ScrapflyClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`scrapfly` package not found, please run `pip install scrapfly-sdk`"
|
||||
)
|
||||
self.scrapfly = ScrapflyClient(key=api_key)
|
||||
|
||||
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None):
|
||||
from scrapfly import ScrapeApiResponse, ScrapeConfig
|
||||
|
||||
scrape_config = scrape_config if scrape_config is not None else {}
|
||||
try:
|
||||
response: ScrapeApiResponse = self.scrapfly.scrape(
|
||||
ScrapeConfig(url, format=scrape_format, **scrape_config)
|
||||
)
|
||||
return response.scrape_result["content"]
|
||||
except Exception as e:
|
||||
if ignore_scrape_failures:
|
||||
logger.error(f"Error fetching data from {url}, exception: {e}")
|
||||
return None
|
||||
else:
|
||||
raise e
|
||||
|
||||
@@ -5,7 +5,7 @@ from pydantic.v1 import BaseModel, Field
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
@@ -6,6 +6,14 @@ from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
def _save_results_to_file(content: str) -> None:
|
||||
"""Saves the search results to a file."""
|
||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
with open(filename, 'w') as file:
|
||||
file.write(content)
|
||||
print(f"Results saved to {filename}")
|
||||
|
||||
|
||||
class SerperDevToolSchema(BaseModel):
|
||||
"""Input for SerperDevTool."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
|
||||
@@ -15,26 +23,31 @@ class SerperDevTool(BaseTool):
|
||||
description: str = "A tool that can be used to search the internet with a search_query."
|
||||
args_schema: Type[BaseModel] = SerperDevToolSchema
|
||||
search_url: str = "https://google.serper.dev/search"
|
||||
n_results: int = 10
|
||||
country: Optional[str] = None
|
||||
location: Optional[str] = None
|
||||
locale: Optional[str] = None
|
||||
n_results: int = Field(default=10, description="Number of search results to return")
|
||||
save_file: bool = Field(default=False, description="Flag to determine whether to save the results to a file")
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
|
||||
search_query = kwargs.get('search_query') or kwargs.get('query')
|
||||
save_file = kwargs.get('save_file', self.save_file)
|
||||
n_results = kwargs.get('n_results', self.n_results)
|
||||
|
||||
payload = json.dumps(
|
||||
{
|
||||
"q": search_query,
|
||||
"num": self.n_results,
|
||||
"num": n_results,
|
||||
"gl": self.country,
|
||||
"location": self.location,
|
||||
"hl": self.locale,
|
||||
}
|
||||
)
|
||||
|
||||
headers = {
|
||||
'X-API-KEY': os.environ['SERPER_API_KEY'],
|
||||
'content-type': 'application/json'
|
||||
@@ -42,7 +55,7 @@ class SerperDevTool(BaseTool):
|
||||
response = requests.request("POST", self.search_url, headers=headers, data=payload)
|
||||
results = response.json()
|
||||
if 'organic' in results:
|
||||
results = results['organic']
|
||||
results = results['organic'][:self.n_results]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
@@ -53,9 +66,11 @@ class SerperDevTool(BaseTool):
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
next
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
if save_file:
|
||||
_save_results_to_file(content)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
|
||||
117
src/crewai_tools/tools/serply_api_tool/README.md
Normal file
117
src/crewai_tools/tools/serply_api_tool/README.md
Normal file
@@ -0,0 +1,117 @@
|
||||
# Serply API Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to perform a web/news/scholar search for a specified query from a text's content across the internet. It utilizes the [Serply.io](https://serply.io) API to fetch and display the most relevant search results based on the query provided by the user.
|
||||
|
||||
## Installation
|
||||
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
## Web Search
|
||||
The following example demonstrates how to initialize the tool and execute a search the web with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyWebSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyWebSearchTool()
|
||||
|
||||
# increase search limits to 100 results
|
||||
tool = SerplyWebSearchTool(limit=100)
|
||||
|
||||
|
||||
# change results language (fr - French)
|
||||
tool = SerplyWebSearchTool(hl="fr")
|
||||
```
|
||||
|
||||
## News Search
|
||||
The following example demonstrates how to initialize the tool and execute a search news with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyNewsSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyNewsSearchTool()
|
||||
|
||||
# change country news (JP - Japan)
|
||||
tool = SerplyNewsSearchTool(proxy_location="JP")
|
||||
```
|
||||
|
||||
## Scholar Search
|
||||
The following example demonstrates how to initialize the tool and execute a search scholar articles a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyScholarSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyScholarSearchTool()
|
||||
|
||||
# change country news (GB - Great Britain)
|
||||
tool = SerplyScholarSearchTool(proxy_location="GB")
|
||||
```
|
||||
|
||||
## Job Search
|
||||
The following example demonstrates how to initialize the tool and searching for jobs in the USA:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyJobSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyJobSearchTool()
|
||||
```
|
||||
|
||||
|
||||
## Web Page To Markdown
|
||||
The following example demonstrates how to initialize the tool and fetch a web page and convert it to markdown:
|
||||
|
||||
```python
|
||||
from crewai_tools import SerplyWebpageToMarkdownTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = SerplyWebpageToMarkdownTool()
|
||||
|
||||
# change country make request from (DE - Germany)
|
||||
tool = SerplyWebpageToMarkdownTool(proxy_location="DE")
|
||||
```
|
||||
|
||||
## Combining Multiple Tools
|
||||
|
||||
The following example demonstrates performing a Google search to find relevant articles. Then, convert those articles to markdown format for easier extraction of key points.
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import SerplyWebSearchTool, SerplyWebpageToMarkdownTool
|
||||
|
||||
search_tool = SerplyWebSearchTool()
|
||||
convert_to_markdown = SerplyWebpageToMarkdownTool()
|
||||
|
||||
# Creating a senior researcher agent with memory and verbose mode
|
||||
researcher = Agent(
|
||||
role='Senior Researcher',
|
||||
goal='Uncover groundbreaking technologies in {topic}',
|
||||
verbose=True,
|
||||
memory=True,
|
||||
backstory=(
|
||||
"Driven by curiosity, you're at the forefront of"
|
||||
"innovation, eager to explore and share knowledge that could change"
|
||||
"the world."
|
||||
),
|
||||
tools=[search_tool, convert_to_markdown],
|
||||
allow_delegation=True
|
||||
)
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `SerplyApiTool`, follow these steps:
|
||||
|
||||
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at [Serply.io](https://serply.io).
|
||||
3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPLY_API_KEY` to facilitate its use by the tool.
|
||||
|
||||
## Conclusion
|
||||
By integrating the `SerplyApiTool` into Python projects, users gain the ability to conduct real-time searches, relevant news across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
|
||||
@@ -0,0 +1,75 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class SerplyJobSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Scholar Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
|
||||
|
||||
|
||||
class SerplyJobSearchTool(RagTool):
|
||||
name: str = "Job Search"
|
||||
description: str = "A tool to perform to perform a job search in the US with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
|
||||
request_url: str = "https://api.serply.io/v1/job/search/"
|
||||
proxy_location: Optional[str] = "US"
|
||||
"""
|
||||
proxy_location: (str): Where to get jobs, specifically for a specific country results.
|
||||
- Currently only supports US
|
||||
"""
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": self.proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
query_payload = {}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.request_url}{urlencode(query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
|
||||
jobs = response.json().get("jobs", "")
|
||||
|
||||
if not jobs:
|
||||
return ""
|
||||
|
||||
string = []
|
||||
for job in jobs:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Position: {job['position']}",
|
||||
f"Employer: {job['employer']}",
|
||||
f"Location: {job['location']}",
|
||||
f"Link: {job['link']}",
|
||||
f"""Highest: {', '.join([h for h in job['highlights']])}""",
|
||||
f"Is Remote: {job['is_remote']}",
|
||||
f"Is Hybrid: {job['is_remote']}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
@@ -0,0 +1,81 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyNewsSearchToolSchema(BaseModel):
|
||||
"""Input for Serply News Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
|
||||
|
||||
|
||||
class SerplyNewsSearchTool(BaseTool):
|
||||
name: str = "News Search"
|
||||
description: str = "A tool to perform News article search with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/news/"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
limit: Optional[int] = 10
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
limit: Optional[int] = 10,
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.limit = limit
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
# build query parameters
|
||||
query_payload = {}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.search_url}{urlencode(query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
results = response.json()
|
||||
if "entries" in results:
|
||||
results = results['entries']
|
||||
string = []
|
||||
for result in results[:self.limit]:
|
||||
try:
|
||||
# follow url
|
||||
r = requests.get(result['link'])
|
||||
final_link = r.history[-1].headers['Location']
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {final_link}",
|
||||
f"Source: {result['source']['title']}",
|
||||
f"Published: {result['published']}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
@@ -0,0 +1,86 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyScholarSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Scholar Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
|
||||
|
||||
|
||||
class SerplyScholarSearchTool(BaseTool):
|
||||
name: str = "Scholar Search"
|
||||
description: str = "A tool to perform News article search with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/scholar/"
|
||||
hl: Optional[str] = "us"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hl: str = "us",
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.hl = hl
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
query_payload = {
|
||||
"hl": self.hl
|
||||
}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.search_url}{urlencode(query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
articles = response.json().get("articles", "")
|
||||
|
||||
if not articles:
|
||||
return ""
|
||||
|
||||
string = []
|
||||
for article in articles:
|
||||
try:
|
||||
if "doc" in article:
|
||||
link = article['doc']['link']
|
||||
else:
|
||||
link = article['link']
|
||||
authors = [author['name'] for author in article['author']['authors']]
|
||||
string.append('\n'.join([
|
||||
f"Title: {article['title']}",
|
||||
f"Link: {link}",
|
||||
f"Description: {article['description']}",
|
||||
f"Cite: {article['cite']}",
|
||||
f"Authors: {', '.join(authors)}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
@@ -0,0 +1,93 @@
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyWebSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Web Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
|
||||
|
||||
|
||||
class SerplyWebSearchTool(BaseTool):
|
||||
name: str = "Google Search"
|
||||
description: str = "A tool to perform Google search with a search_query."
|
||||
args_schema: Type[BaseModel] = SerplyWebSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/search/"
|
||||
hl: Optional[str] = "us"
|
||||
limit: Optional[int] = 10
|
||||
device_type: Optional[str] = "desktop"
|
||||
proxy_location: Optional[str] = "US"
|
||||
query_payload: Optional[dict] = {}
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hl: str = "us",
|
||||
limit: int = 10,
|
||||
device_type: str = "desktop",
|
||||
proxy_location: str = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
param: query (str): The query to search for
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
param: device_type (str): desktop/mobile results (defaults to desktop)
|
||||
proxy_location: (str): Where to perform the search, specifically for local/regional results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.limit = limit
|
||||
self.device_type = device_type
|
||||
self.proxy_location = proxy_location
|
||||
|
||||
# build query parameters
|
||||
self.query_payload = {
|
||||
"num": limit,
|
||||
"gl": proxy_location.upper(),
|
||||
"hl": hl.lower()
|
||||
}
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"X-User-Agent": device_type,
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
if "query" in kwargs:
|
||||
self.query_payload["q"] = kwargs["query"]
|
||||
elif "search_query" in kwargs:
|
||||
self.query_payload["q"] = kwargs["search_query"]
|
||||
|
||||
# build the url
|
||||
url = f"{self.search_url}{urlencode(self.query_payload)}"
|
||||
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
results = response.json()
|
||||
if "results" in results:
|
||||
results = results['results']
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['link']}",
|
||||
f"Description: {result['description'].strip()}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
@@ -0,0 +1,48 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class SerplyWebpageToMarkdownToolSchema(BaseModel):
|
||||
"""Input for Serply Scholar Search."""
|
||||
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
|
||||
|
||||
|
||||
class SerplyWebpageToMarkdownTool(RagTool):
|
||||
name: str = "Webpage to Markdown"
|
||||
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
|
||||
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
|
||||
request_url: str = "https://api.serply.io/v1/request"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
data = {
|
||||
"url": kwargs["url"],
|
||||
"method": "GET",
|
||||
"response_type": "markdown"
|
||||
}
|
||||
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
|
||||
return response.text
|
||||
@@ -57,4 +57,4 @@ class TXTSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class WebsiteSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class XMLSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -60,4 +60,4 @@ class YoutubeChannelSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
@@ -57,4 +57,4 @@ class YoutubeVideoSearchTool(RagTool):
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
return super()._run(query=search_query, **kwargs)
|
||||
|
||||
38
tests/tools/test_code_interpreter_tool.py
Normal file
38
tests/tools/test_code_interpreter_tool.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from crewai_tools.tools.code_interpreter_tool.code_interpreter_tool import (
|
||||
CodeInterpreterTool,
|
||||
)
|
||||
|
||||
|
||||
class TestCodeInterpreterTool(unittest.TestCase):
|
||||
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
|
||||
def test_run_code_in_docker(self, docker_mock):
|
||||
tool = CodeInterpreterTool()
|
||||
code = "print('Hello, World!')"
|
||||
libraries_used = "numpy,pandas"
|
||||
expected_output = "Hello, World!\n"
|
||||
|
||||
docker_mock.from_env().containers.run().exec_run().exit_code = 0
|
||||
docker_mock.from_env().containers.run().exec_run().output = (
|
||||
expected_output.encode()
|
||||
)
|
||||
result = tool.run_code_in_docker(code, libraries_used)
|
||||
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
|
||||
def test_run_code_in_docker_with_error(self, docker_mock):
|
||||
tool = CodeInterpreterTool()
|
||||
code = "print(1/0)"
|
||||
libraries_used = "numpy,pandas"
|
||||
expected_output = "Something went wrong while running the code: \nZeroDivisionError: division by zero\n"
|
||||
|
||||
docker_mock.from_env().containers.run().exec_run().exit_code = 1
|
||||
docker_mock.from_env().containers.run().exec_run().output = (
|
||||
b"ZeroDivisionError: division by zero\n"
|
||||
)
|
||||
result = tool.run_code_in_docker(code, libraries_used)
|
||||
|
||||
self.assertEqual(result, expected_output)
|
||||
Reference in New Issue
Block a user