diff --git a/src/crewai/agent.py b/src/crewai/agent.py index 26db2c9fc..02f84f23d 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -410,6 +410,10 @@ class Agent(BaseAgent): tools = agent_tools.tools() return tools + def get_multimodal_tools(self): + from crewai.tools.agent_tools.add_image_tool import AddImageTool + return [AddImageTool()] + def get_code_execution_tools(self): try: from crewai_tools import CodeInterpreterTool diff --git a/src/crewai/crew.py b/src/crewai/crew.py index 0c1e92998..624f82c17 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -726,13 +726,11 @@ class Crew(BaseModel): # Determine which tools to use - task tools take precedence over agent tools tools_for_task = task.tools if task.tools else agent_to_use.tools or [] - # Add delegation tools if agent allows delegation - if agent_to_use.allow_delegation: - tools_for_task = self._prepare_tools(task, tools_for_task) - - # Add code execution tools if agent allows code execution - if agent_to_use.allow_code_execution: - tools_for_task += agent_to_use.get_code_execution_tools() + tools_for_task = self._prepare_tools( + agent_to_use, + task, + tools_for_task + ) self._log_task_start(task, agent_to_use.role) @@ -799,18 +797,24 @@ class Crew(BaseModel): return skipped_task_output return None - def _prepare_tools(self, task: Task, tools: List[Tool]): - if self.process == Process.hierarchical: - if self.manager_agent: - tools = self._update_manager_tools(task, tools) - else: - raise ValueError("Manager agent is required for hierarchical process.") + def _prepare_tools(self, agent: BaseAgent, task: Task, tools: List[Tool]): + # Add delegation tools if agent allows delegation + if agent.allow_delegation: + if self.process == Process.hierarchical: + if self.manager_agent: + tools = self._update_manager_tools(task, tools) + else: + raise ValueError("Manager agent is required for hierarchical process.") - elif task.agent and task.agent.allow_delegation: - tools = self._add_delegation_tools(task, tools) + elif agent and agent.allow_delegation: + tools = self._add_delegation_tools(task, tools) - if task.agent and task.agent.multimodal: - tools = self._add_multimodal_tools(task, tools) + # Add code execution tools if agent allows code execution + if agent.allow_code_execution: + tools = self._add_code_execution_tools(agent, tools) + + if agent and agent.multimodal: + tools = self._add_multimodal_tools(agent, tools) return tools @@ -819,10 +823,34 @@ class Crew(BaseModel): return self.manager_agent return task.agent - def _add_multimodal_tools(self, task: Task, tools: List[Tool]): - tools.extend(task.agent.get_multimodal_tools()) + def _merge_tools(self, existing_tools: List[Tool], new_tools: List[Tool]) -> List[Tool]: + """Merge new tools into existing tools list, avoiding duplicates by tool name.""" + if not new_tools: + return existing_tools + + # Create mapping of tool names to new tools + new_tool_map = {tool.name: tool for tool in new_tools} + + # Remove any existing tools that will be replaced + tools = [tool for tool in existing_tools if tool.name not in new_tool_map] + + # Add all new tools + tools.extend(new_tools) + return tools + def _inject_delegation_tools(self, tools: List[Tool], task_agent: BaseAgent, agents: List[BaseAgent]): + delegation_tools = task_agent.get_delegation_tools(agents) + return self._merge_tools(tools, delegation_tools) + + def _add_multimodal_tools(self, agent: BaseAgent, tools: List[Tool]): + multimodal_tools = agent.get_multimodal_tools() + return self._merge_tools(tools, multimodal_tools) + + def _add_code_execution_tools(self, agent: BaseAgent, tools: List[Tool]): + code_tools = agent.get_code_execution_tools() + return self._merge_tools(tools, code_tools) + def _add_delegation_tools(self, task: Task, tools: List[Tool]): agents_for_delegation = [agent for agent in self.agents if agent != task.agent] if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent: @@ -846,19 +874,6 @@ class Crew(BaseModel): # self.manager_agent.tools = tools return tools - def _inject_delegation_tools(self, tools: List[Tool], task_agent: BaseAgent, agents: List[BaseAgent]): - delegation_tools = task_agent.get_delegation_tools(agents) - # Create mapping of tool names to new delegation tools - delegation_tool_map = {tool.name: tool for tool in delegation_tools} - - # Remove any existing tools that will be replaced - tools = [tool for tool in tools if tool.name not in delegation_tool_map] - - # Add all delegation tools - tools.extend(delegation_tools) - - return tools - def _get_context(self, task: Task, task_outputs: List[TaskOutput]): context = ( aggregate_raw_outputs_from_tasks(task.context) diff --git a/src/crewai/tools/agent_tools/add_image_tool.py b/src/crewai/tools/agent_tools/add_image_tool.py new file mode 100644 index 000000000..a37432c1e --- /dev/null +++ b/src/crewai/tools/agent_tools/add_image_tool.py @@ -0,0 +1,34 @@ +from pydantic import BaseModel, Field +from crewai.tools.base_tool import BaseTool + + +class AddImageToolSchema(BaseModel): + image_url: str = Field(..., description="The URL or path of the image to add") + action: str = Field(..., description="The context or purpose of why this image is being added and how it should be used") + + +class AddImageTool(BaseTool): + """Tool for adding images to the content""" + + name: str = "Add image to content" + description: str = "See image to understand it's content" + args_schema: type[BaseModel] = AddImageToolSchema + + def _run( + self, + image_url: str, + action: str, + **kwargs, + ) -> dict: + return { + "role": "user", + "content": [ + {"type": "text", "text": action}, + { + "type": "image_url", + "image_url": { + "url": image_url, + }, + }, + ], + } diff --git a/src/crewai/tools/agent_tools/delegate_work_tool.py b/src/crewai/tools/agent_tools/delegate_work_tool.py index 9dbf6c920..45f12772d 100644 --- a/src/crewai/tools/agent_tools/delegate_work_tool.py +++ b/src/crewai/tools/agent_tools/delegate_work_tool.py @@ -1,7 +1,5 @@ from typing import Optional - from pydantic import BaseModel, Field - from crewai.tools.agent_tools.base_agent_tools import BaseAgentTool diff --git a/tests/crew_test.py b/tests/crew_test.py index 4847a10ec..44040358b 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -2952,3 +2952,51 @@ def test_task_tools_preserve_code_execution_tools(): # Verify the total number of tools (TestTool + CodeInterpreter + 2 delegation tools) assert len(used_tools) == 4, "Should have TestTool, CodeInterpreter, and 2 delegation tools" + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_multimodal_flag_adds_multimodal_tools(): + """ + Test that an agent with multimodal=True automatically has multimodal tools added to the task execution. + """ + from crewai.tools.agent_tools.add_image_tool import AddImageTool + + # Create an agent that supports multimodal + multimodal_agent = Agent( + role="Multimodal Analyst", + goal="Handle multiple media types (text, images, etc.).", + backstory="You're an agent specialized in analyzing text, images, and other media.", + allow_delegation=False, + multimodal=True, # crucial for adding the multimodal tool + ) + + # Create a dummy task + task = Task( + description="Describe what's in this image and generate relevant metadata.", + expected_output="An image description plus any relevant metadata.", + agent=multimodal_agent, + ) + + # Define a crew with the multimodal agent + crew = Crew(agents=[multimodal_agent], tasks=[task], process=Process.sequential) + + mock_task_output = TaskOutput( + description="Mock description", + raw="mocked output", + agent="mocked agent" + ) + + # Mock execute_sync to verify the tools passed at runtime + with patch.object(Task, "execute_sync", return_value=mock_task_output) as mock_execute_sync: + crew.kickoff() + + # Get the tools that were actually used in execution + _, kwargs = mock_execute_sync.call_args + used_tools = kwargs["tools"] + + # Check that the multimodal tool was added + assert any(isinstance(tool, AddImageTool) for tool in used_tools), ( + "AddImageTool should be present when agent is multimodal" + ) + + # Verify we have exactly one tool (just the AddImageTool) + assert len(used_tools) == 1, "Should only have the AddImageTool" \ No newline at end of file