Refactor prepare tool and adding initial add images logic

This commit is contained in:
João Moura
2024-12-26 13:30:59 -03:00
parent e6be4ed66d
commit 93bee87324
5 changed files with 133 additions and 34 deletions

View File

@@ -410,6 +410,10 @@ class Agent(BaseAgent):
tools = agent_tools.tools()
return tools
def get_multimodal_tools(self):
from crewai.tools.agent_tools.add_image_tool import AddImageTool
return [AddImageTool()]
def get_code_execution_tools(self):
try:
from crewai_tools import CodeInterpreterTool

View File

@@ -726,13 +726,11 @@ class Crew(BaseModel):
# Determine which tools to use - task tools take precedence over agent tools
tools_for_task = task.tools if task.tools else agent_to_use.tools or []
# Add delegation tools if agent allows delegation
if agent_to_use.allow_delegation:
tools_for_task = self._prepare_tools(task, tools_for_task)
# Add code execution tools if agent allows code execution
if agent_to_use.allow_code_execution:
tools_for_task += agent_to_use.get_code_execution_tools()
tools_for_task = self._prepare_tools(
agent_to_use,
task,
tools_for_task
)
self._log_task_start(task, agent_to_use.role)
@@ -799,18 +797,24 @@ class Crew(BaseModel):
return skipped_task_output
return None
def _prepare_tools(self, task: Task, tools: List[Tool]):
if self.process == Process.hierarchical:
if self.manager_agent:
tools = self._update_manager_tools(task, tools)
else:
raise ValueError("Manager agent is required for hierarchical process.")
def _prepare_tools(self, agent: BaseAgent, task: Task, tools: List[Tool]):
# Add delegation tools if agent allows delegation
if agent.allow_delegation:
if self.process == Process.hierarchical:
if self.manager_agent:
tools = self._update_manager_tools(task, tools)
else:
raise ValueError("Manager agent is required for hierarchical process.")
elif task.agent and task.agent.allow_delegation:
tools = self._add_delegation_tools(task, tools)
elif agent and agent.allow_delegation:
tools = self._add_delegation_tools(task, tools)
if task.agent and task.agent.multimodal:
tools = self._add_multimodal_tools(task, tools)
# Add code execution tools if agent allows code execution
if agent.allow_code_execution:
tools = self._add_code_execution_tools(agent, tools)
if agent and agent.multimodal:
tools = self._add_multimodal_tools(agent, tools)
return tools
@@ -819,10 +823,34 @@ class Crew(BaseModel):
return self.manager_agent
return task.agent
def _add_multimodal_tools(self, task: Task, tools: List[Tool]):
tools.extend(task.agent.get_multimodal_tools())
def _merge_tools(self, existing_tools: List[Tool], new_tools: List[Tool]) -> List[Tool]:
"""Merge new tools into existing tools list, avoiding duplicates by tool name."""
if not new_tools:
return existing_tools
# Create mapping of tool names to new tools
new_tool_map = {tool.name: tool for tool in new_tools}
# Remove any existing tools that will be replaced
tools = [tool for tool in existing_tools if tool.name not in new_tool_map]
# Add all new tools
tools.extend(new_tools)
return tools
def _inject_delegation_tools(self, tools: List[Tool], task_agent: BaseAgent, agents: List[BaseAgent]):
delegation_tools = task_agent.get_delegation_tools(agents)
return self._merge_tools(tools, delegation_tools)
def _add_multimodal_tools(self, agent: BaseAgent, tools: List[Tool]):
multimodal_tools = agent.get_multimodal_tools()
return self._merge_tools(tools, multimodal_tools)
def _add_code_execution_tools(self, agent: BaseAgent, tools: List[Tool]):
code_tools = agent.get_code_execution_tools()
return self._merge_tools(tools, code_tools)
def _add_delegation_tools(self, task: Task, tools: List[Tool]):
agents_for_delegation = [agent for agent in self.agents if agent != task.agent]
if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent:
@@ -846,19 +874,6 @@ class Crew(BaseModel):
# self.manager_agent.tools = tools
return tools
def _inject_delegation_tools(self, tools: List[Tool], task_agent: BaseAgent, agents: List[BaseAgent]):
delegation_tools = task_agent.get_delegation_tools(agents)
# Create mapping of tool names to new delegation tools
delegation_tool_map = {tool.name: tool for tool in delegation_tools}
# Remove any existing tools that will be replaced
tools = [tool for tool in tools if tool.name not in delegation_tool_map]
# Add all delegation tools
tools.extend(delegation_tools)
return tools
def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
context = (
aggregate_raw_outputs_from_tasks(task.context)

View File

@@ -0,0 +1,34 @@
from pydantic import BaseModel, Field
from crewai.tools.base_tool import BaseTool
class AddImageToolSchema(BaseModel):
image_url: str = Field(..., description="The URL or path of the image to add")
action: str = Field(..., description="The context or purpose of why this image is being added and how it should be used")
class AddImageTool(BaseTool):
"""Tool for adding images to the content"""
name: str = "Add image to content"
description: str = "See image to understand it's content"
args_schema: type[BaseModel] = AddImageToolSchema
def _run(
self,
image_url: str,
action: str,
**kwargs,
) -> dict:
return {
"role": "user",
"content": [
{"type": "text", "text": action},
{
"type": "image_url",
"image_url": {
"url": image_url,
},
},
],
}

View File

@@ -1,7 +1,5 @@
from typing import Optional
from pydantic import BaseModel, Field
from crewai.tools.agent_tools.base_agent_tools import BaseAgentTool

View File

@@ -2952,3 +2952,51 @@ def test_task_tools_preserve_code_execution_tools():
# Verify the total number of tools (TestTool + CodeInterpreter + 2 delegation tools)
assert len(used_tools) == 4, "Should have TestTool, CodeInterpreter, and 2 delegation tools"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_multimodal_flag_adds_multimodal_tools():
"""
Test that an agent with multimodal=True automatically has multimodal tools added to the task execution.
"""
from crewai.tools.agent_tools.add_image_tool import AddImageTool
# Create an agent that supports multimodal
multimodal_agent = Agent(
role="Multimodal Analyst",
goal="Handle multiple media types (text, images, etc.).",
backstory="You're an agent specialized in analyzing text, images, and other media.",
allow_delegation=False,
multimodal=True, # crucial for adding the multimodal tool
)
# Create a dummy task
task = Task(
description="Describe what's in this image and generate relevant metadata.",
expected_output="An image description plus any relevant metadata.",
agent=multimodal_agent,
)
# Define a crew with the multimodal agent
crew = Crew(agents=[multimodal_agent], tasks=[task], process=Process.sequential)
mock_task_output = TaskOutput(
description="Mock description",
raw="mocked output",
agent="mocked agent"
)
# Mock execute_sync to verify the tools passed at runtime
with patch.object(Task, "execute_sync", return_value=mock_task_output) as mock_execute_sync:
crew.kickoff()
# Get the tools that were actually used in execution
_, kwargs = mock_execute_sync.call_args
used_tools = kwargs["tools"]
# Check that the multimodal tool was added
assert any(isinstance(tool, AddImageTool) for tool in used_tools), (
"AddImageTool should be present when agent is multimodal"
)
# Verify we have exactly one tool (just the AddImageTool)
assert len(used_tools) == 1, "Should only have the AddImageTool"