supporting image tool

This commit is contained in:
João Moura
2024-12-26 23:24:41 -03:00
parent 93bee87324
commit e61f2f50c9
5 changed files with 634 additions and 13 deletions

View File

@@ -144,9 +144,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
formatted_answer
)
# Directly append the result to the messages if the
# tool is "Add image to content" in case of multimodal
# agents
if formatted_answer.tool == "Add image to content":
self.messages.append(tool_result.result)
continue
else:
if self.step_callback:
self.step_callback(tool_result)

View File

@@ -4,31 +4,37 @@ from crewai.tools.base_tool import BaseTool
class AddImageToolSchema(BaseModel):
image_url: str = Field(..., description="The URL or path of the image to add")
action: str = Field(..., description="The context or purpose of why this image is being added and how it should be used")
action: str = Field(
default="Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe.",
description="Optional context or question about the image"
)
class AddImageTool(BaseTool):
"""Tool for adding images to the content"""
name: str = "Add image to content"
description: str = "See image to understand it's content"
description: str = "See image to understand it's content, you can optionally ask a question about the image"
args_schema: type[BaseModel] = AddImageToolSchema
def _run(
self,
image_url: str,
action: str,
action: str = None,
**kwargs,
) -> dict:
action = action or "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
content = [
{"type": "text", "text": action},
{
"type": "image_url",
"image_url": {
"url": image_url,
},
}
]
return {
"role": "user",
"content": [
{"type": "text", "text": action},
{
"type": "image_url",
"image_url": {
"url": image_url,
},
},
],
"content": content
}

View File

@@ -5,6 +5,7 @@ from difflib import SequenceMatcher
from textwrap import dedent
from typing import Any, List, Union
from crewai.tools.structured_tool import CrewStructuredTool
import crewai.utilities.events as events
from crewai.agents.tools_handler import ToolsHandler
from crewai.task import Task
@@ -103,6 +104,19 @@ class ToolUsage:
if self.agent.verbose:
self._printer.print(content=f"\n\n{error}\n", color="red")
return error
if isinstance(tool, CrewStructuredTool) and tool.name == 'Add image to content':
try:
result = self._use(tool_string=tool_string, tool=tool, calling=calling)
return result
except Exception as e:
error = getattr(e, "message", str(e))
self.task.increment_tools_errors()
if self.agent.verbose:
self._printer.print(content=f"\n\n{error}\n", color="red")
return error
return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}" # type: ignore # BUG?: "_use" of "ToolUsage" does not return a value (it only ever returns None)
def _use(