mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
feat: Add Vision tool to the CrewAI tool
This commit is contained in:
@@ -17,23 +17,25 @@ from .tools import (
|
|||||||
LlamaIndexTool,
|
LlamaIndexTool,
|
||||||
MDXSearchTool,
|
MDXSearchTool,
|
||||||
MultiOnTool,
|
MultiOnTool,
|
||||||
|
NL2SQLTool,
|
||||||
PDFSearchTool,
|
PDFSearchTool,
|
||||||
PGSearchTool,
|
PGSearchTool,
|
||||||
RagTool,
|
RagTool,
|
||||||
ScrapeElementFromWebsiteTool,
|
ScrapeElementFromWebsiteTool,
|
||||||
ScrapflyScrapeWebsiteTool,
|
|
||||||
ScrapeWebsiteTool,
|
ScrapeWebsiteTool,
|
||||||
|
ScrapflyScrapeWebsiteTool,
|
||||||
SeleniumScrapingTool,
|
SeleniumScrapingTool,
|
||||||
SerperDevTool,
|
SerperDevTool,
|
||||||
SerplyWebSearchTool,
|
SerplyJobSearchTool,
|
||||||
SerplyNewsSearchTool,
|
SerplyNewsSearchTool,
|
||||||
SerplyScholarSearchTool,
|
SerplyScholarSearchTool,
|
||||||
SerplyWebpageToMarkdownTool,
|
SerplyWebpageToMarkdownTool,
|
||||||
SerplyJobSearchTool,
|
SerplyWebSearchTool,
|
||||||
TXTSearchTool,
|
TXTSearchTool,
|
||||||
|
VisionTool,
|
||||||
WebsiteSearchTool,
|
WebsiteSearchTool,
|
||||||
XMLSearchTool,
|
XMLSearchTool,
|
||||||
YoutubeChannelSearchTool,
|
YoutubeChannelSearchTool,
|
||||||
YoutubeVideoSearchTool
|
YoutubeVideoSearchTool,
|
||||||
)
|
)
|
||||||
from .tools.base_tool import BaseTool, Tool, tool
|
from .tools.base_tool import BaseTool, Tool, tool
|
||||||
|
|||||||
@@ -8,14 +8,19 @@ from .directory_search_tool.directory_search_tool import DirectorySearchTool
|
|||||||
from .docx_search_tool.docx_search_tool import DOCXSearchTool
|
from .docx_search_tool.docx_search_tool import DOCXSearchTool
|
||||||
from .exa_tools.exa_search_tool import EXASearchTool
|
from .exa_tools.exa_search_tool import EXASearchTool
|
||||||
from .file_read_tool.file_read_tool import FileReadTool
|
from .file_read_tool.file_read_tool import FileReadTool
|
||||||
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import FirecrawlCrawlWebsiteTool
|
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||||
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import FirecrawlScrapeWebsiteTool
|
FirecrawlCrawlWebsiteTool,
|
||||||
|
)
|
||||||
|
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||||
|
FirecrawlScrapeWebsiteTool,
|
||||||
|
)
|
||||||
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||||
from .github_search_tool.github_search_tool import GithubSearchTool
|
from .github_search_tool.github_search_tool import GithubSearchTool
|
||||||
from .json_search_tool.json_search_tool import JSONSearchTool
|
from .json_search_tool.json_search_tool import JSONSearchTool
|
||||||
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
|
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
|
||||||
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
||||||
from .multion_tool.multion_tool import MultiOnTool
|
from .multion_tool.multion_tool import MultiOnTool
|
||||||
|
from .nl2sql.nl2sql_tool import NL2SQLTool
|
||||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||||
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
||||||
from .rag.rag_tool import RagTool
|
from .rag.rag_tool import RagTool
|
||||||
@@ -23,17 +28,22 @@ from .scrape_element_from_website.scrape_element_from_website import (
|
|||||||
ScrapeElementFromWebsiteTool,
|
ScrapeElementFromWebsiteTool,
|
||||||
)
|
)
|
||||||
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
||||||
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ScrapflyScrapeWebsiteTool
|
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
|
||||||
|
ScrapflyScrapeWebsiteTool,
|
||||||
|
)
|
||||||
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
||||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||||
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
|
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
|
||||||
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
|
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
|
||||||
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
|
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
|
||||||
|
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
|
||||||
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
|
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
|
||||||
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
|
from .spider_tool.spider_tool import SpiderTool
|
||||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||||
|
from .vision_tool.vision_tool import VisionTool
|
||||||
from .website_search.website_search_tool import WebsiteSearchTool
|
from .website_search.website_search_tool import WebsiteSearchTool
|
||||||
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
||||||
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
|
from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||||
|
YoutubeChannelSearchTool,
|
||||||
|
)
|
||||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||||
from .spider_tool.spider_tool import SpiderTool
|
|
||||||
|
|||||||
30
src/crewai_tools/tools/vision_tool/README.md
Normal file
30
src/crewai_tools/tools/vision_tool/README.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Vision Tool
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
This tool is used to extract text from images. When passed to the agent it will extract the text from the image and then use it to generate a response, report or any other output. The URL or the PATH of the image should be passed to the Agent.
|
||||||
|
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
Install the crewai_tools package
|
||||||
|
```shell
|
||||||
|
pip install 'crewai[tools]'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
In order to use the VisionTool, the OpenAI API key should be set in the environment variable `OPENAI_API_KEY`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai_tools import VisionTool
|
||||||
|
|
||||||
|
vision_tool = VisionTool()
|
||||||
|
|
||||||
|
@agent
|
||||||
|
def researcher(self) -> Agent:
|
||||||
|
return Agent(
|
||||||
|
config=self.agents_config["researcher"],
|
||||||
|
allow_delegation=False,
|
||||||
|
tools=[vision_tool]
|
||||||
|
)
|
||||||
|
```
|
||||||
93
src/crewai_tools/tools/vision_tool/vision_tool.py
Normal file
93
src/crewai_tools/tools/vision_tool/vision_tool.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
import base64
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
from openai import OpenAI
|
||||||
|
from pydantic.v1 import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class ImagePromptSchema(BaseModel):
|
||||||
|
"""Input for Vision Tool."""
|
||||||
|
|
||||||
|
image_path_url: str = "The image path or URL."
|
||||||
|
|
||||||
|
|
||||||
|
class VisionTool(BaseTool):
|
||||||
|
name: str = "Vision Tool"
|
||||||
|
description: str = (
|
||||||
|
"This tool uses OpenAI's Vision API to describe the contents of an image."
|
||||||
|
)
|
||||||
|
args_schema: Type[BaseModel] = ImagePromptSchema
|
||||||
|
|
||||||
|
def _run_web_hosted_images(self, client, image_path_url: str) -> str:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-4o-mini",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": image_path_url},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.choices[0].message.content
|
||||||
|
|
||||||
|
def _run_local_images(self, client, image_path_url: str) -> str:
|
||||||
|
base64_image = self._encode_image(image_path_url)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {client.api_key}",
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 300,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"https://api.openai.com/v1/chat/completions", headers=headers, json=payload
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
def _run(self, **kwargs) -> str:
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
image_path_url = kwargs.get("image_path_url")
|
||||||
|
|
||||||
|
if not image_path_url:
|
||||||
|
return "Image Path or URL is required."
|
||||||
|
|
||||||
|
if "http" in image_path_url:
|
||||||
|
image_description = self._run_web_hosted_images(client, image_path_url)
|
||||||
|
else:
|
||||||
|
image_description = self._run_local_images(client, image_path_url)
|
||||||
|
|
||||||
|
return image_description
|
||||||
|
|
||||||
|
def _encode_image(self, image_path: str):
|
||||||
|
with open(image_path, "rb") as image_file:
|
||||||
|
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||||
Reference in New Issue
Block a user