fix: hierarchical process delegation targets other agents instead of self

Fix a bug in _update_manager_tools() where, when a task had an assigned agent in hierarchical mode, the delegation tools were configured to only allow delegation to the assigned agent itself ([task.agent]), making delegation circular and ineffective. Now delegation tools correctly target all OTHER agents in the crew (excluding the assigned agent), enabling proper task delegation in hierarchical process mode. Fixes #4783 Co-Authored-By: João <joao@crewai.com>
2026-05-03 00:02:36 +00:00 · 2026-03-09 03:24:37 +00:00
4 changed files with 219 additions and 264 deletions
--- a/lib/crewai/src/crewai/crew.py
+++ b/lib/crewai/src/crewai/crew.py
@@ -1480,7 +1480,8 @@ class Crew(FlowTrackable, BaseModel):
    ) -> list[BaseTool]:
        if self.manager_agent:
            if task.agent:
-                tools = self._inject_delegation_tools(tools, task.agent, [task.agent])
+                other_agents = [agent for agent in self.agents if agent != task.agent]
+                tools = self._inject_delegation_tools(tools, task.agent, other_agents)
            else:
                tools = self._inject_delegation_tools(
                    tools, self.manager_agent, self.agents
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -967,43 +967,7 @@ class LLM(BaseLLM):
                self._track_token_usage_internal(usage_info)
            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)

-            # --- 8) Check accumulated_tool_args from streaming deltas
-            # Streaming responses deliver tool calls via deltas accumulated in
-            # accumulated_tool_args, not via the final chunk's message. When
-            # available_functions is None (native tool handling), we must return
-            # the accumulated tool calls so the caller (e.g., executor) can
-            # handle them. When available_functions is provided, tool execution
-            # already happened during the chunk processing loop via
-            # _handle_streaming_tool_calls.
-            if accumulated_tool_args and not available_functions:
-                tool_calls_list: list[ChatCompletionDeltaToolCall] = [
-                    ChatCompletionDeltaToolCall(
-                        index=idx,
-                        function=Function(
-                            name=tool_arg.function.name,
-                            arguments=tool_arg.function.arguments,
-                        ),
-                    )
-                    for idx, tool_arg in accumulated_tool_args.items()
-                    if tool_arg.function.name
-                ]
-
-                if tool_calls_list:
-                    self._handle_emit_call_events(
-                        response=full_response,
-                        call_type=LLMCallType.LLM_CALL,
-                        from_task=from_task,
-                        from_agent=from_agent,
-                        messages=params["messages"],
-                    )
-                    return tool_calls_list
-
-            # --- 8b) If there are tool calls from last chunk but no available functions,
-            # return the tool calls
-            if tool_calls and not available_functions:
-                return tool_calls
-
-            if not tool_calls and not accumulated_tool_args:
+            if not tool_calls or not available_functions:
                if response_model and self.is_litellm:
                    instructor_instance = InternalInstructor(
                        content=full_response,
@@ -1030,11 +994,10 @@ class LLM(BaseLLM):
                )
                return full_response

-            # --- 9) Handle tool calls from last chunk if present (execute when available_functions provided)
-            if tool_calls and available_functions:
-                tool_result = self._handle_tool_call(tool_calls, available_functions)
-                if tool_result is not None:
-                    return tool_result
+            # --- 9) Handle tool calls if present
+            tool_result = self._handle_tool_call(tool_calls, available_functions)
+            if tool_result is not None:
+                return tool_result

            # --- 10) Emit completion event and return response
            self._handle_emit_call_events(
@@ -1271,17 +1234,8 @@ class LLM(BaseLLM):
        # --- 4) Check for tool calls
        tool_calls = getattr(response_message, "tool_calls", [])

-        # --- 5) If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        # This must be checked before the text response fallback because some LLMs
-        # (e.g., Anthropic) return both text content and tool calls in the same response.
-        # The isinstance check ensures we have actual tool call data (list), not
-        # auto-generated attributes from mocks or unexpected types.
-        if isinstance(tool_calls, list) and tool_calls and not available_functions:
-            return tool_calls
-
-        # --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
-        if not tool_calls and text_response:
+        # --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
+        if (not tool_calls or not available_functions) and text_response:
            self._handle_emit_call_events(
                response=text_response,
                call_type=LLMCallType.LLM_CALL,
@@ -1291,8 +1245,13 @@ class LLM(BaseLLM):
            )
            return text_response

+        # --- 6) If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        if tool_calls and not available_functions:
+            return tool_calls
+
        # --- 7) Handle tool calls if present (execute when available_functions provided)
-        if isinstance(tool_calls, list) and tool_calls and available_functions:
+        if tool_calls and available_functions:
            tool_result = self._handle_tool_call(
                tool_calls, available_functions, from_task, from_agent
            )
@@ -1405,16 +1364,7 @@ class LLM(BaseLLM):

        tool_calls = getattr(response_message, "tool_calls", [])

-        # If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        # This must be checked before the text response fallback because some LLMs
-        # (e.g., Anthropic) return both text content and tool calls in the same response.
-        # The isinstance check ensures we have actual tool call data (list), not
-        # auto-generated attributes from mocks or unexpected types.
-        if isinstance(tool_calls, list) and tool_calls and not available_functions:
-            return tool_calls
-
-        if not tool_calls and text_response:
+        if (not tool_calls or not available_functions) and text_response:
            self._handle_emit_call_events(
                response=text_response,
                call_type=LLMCallType.LLM_CALL,
@@ -1424,8 +1374,13 @@ class LLM(BaseLLM):
            )
            return text_response

+        # If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        if tool_calls and not available_functions:
+            return tool_calls
+
        # Handle tool calls if present (execute when available_functions provided)
-        if isinstance(tool_calls, list) and tool_calls and available_functions:
+        if tool_calls and available_functions:
            tool_result = self._handle_tool_call(
                tool_calls, available_functions, from_task, from_agent
            )
@@ -1558,7 +1513,7 @@ class LLM(BaseLLM):
            if usage_info:
                self._track_token_usage_internal(usage_info)

-            if accumulated_tool_args:
+            if accumulated_tool_args and available_functions:
                # Convert accumulated tool args to ChatCompletionDeltaToolCall objects
                tool_calls_list: list[ChatCompletionDeltaToolCall] = [
                    ChatCompletionDeltaToolCall(
@@ -1572,14 +1527,7 @@ class LLM(BaseLLM):
                    if tool_arg.function.name
                ]

-                # If there are tool calls but no available functions, return the tool calls
-                # This allows the caller (e.g., executor) to handle tool execution.
-                # This must be checked before the text response fallback because some LLMs
-                # (e.g., Anthropic) return both text content and tool calls in the same response.
-                if tool_calls_list and not available_functions:
-                    return tool_calls_list
-
-                if tool_calls_list and available_functions:
+                if tool_calls_list:
                    result = self._handle_streaming_tool_calls(
                        tool_calls=tool_calls_list,
                        accumulated_tool_args=accumulated_tool_args,
--- a/lib/crewai/tests/test_crew.py
+++ b/lib/crewai/tests/test_crew.py
@@ -394,7 +394,9 @@ def test_manager_llm_requirement_for_hierarchical_process(researcher, writer):
@pytest.mark.vcr()
 def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer):
    """
-    Test that the manager agent delegates to the assigned task agent.
+    Test that when a task is assigned to a specific agent in hierarchical mode,
+    the delegation tools allow delegating to OTHER agents (not the assigned agent itself).
+    This verifies the fix for issue #4783.
    """
    task = Task(
        description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
@@ -429,15 +431,15 @@ def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer):
        _, kwargs = mock_execute_sync.call_args
        tools = kwargs["tools"]

-        # Verify the delegation tools were passed correctly
+        # Verify the delegation tools target OTHER agents, not the assigned agent itself
        assert len(tools) == 2
        assert any(
-            "Delegate a specific task to one of the following coworkers: Researcher"
+            "Delegate a specific task to one of the following coworkers: Senior Writer"
            in tool.description
            for tool in tools
        )
        assert any(
-            "Ask a specific question to one of the following coworkers: Researcher"
+            "Ask a specific question to one of the following coworkers: Senior Writer"
            in tool.description
            for tool in tools
        )
@@ -481,6 +483,7 @@ def test_manager_agent_delegates_with_varied_role_cases():
    """
    Test that the manager agent can delegate to agents regardless of case or whitespace variations in role names.
    This test verifies the fix for issue #1503 where role matching was too strict.
+    After fix for #4783, delegation targets should be the OTHER agents (not the assigned agent).
    """
    # Create agents with varied case and whitespace in roles
    researcher_spaced = Agent(
@@ -530,7 +533,8 @@ def test_manager_agent_delegates_with_varied_role_cases():
        # Verify the delegation tools were passed correctly and can handle case/whitespace variations
        assert len(tools) == 2

-        # Check delegation tool descriptions (should work despite case/whitespace differences)
+        # Check delegation tool descriptions - should target the OTHER agent (SENIOR WRITER),
+        # not the assigned agent (Researcher)
        delegation_tool = tools[0]
        question_tool = tools[1]

@@ -538,19 +542,196 @@ def test_manager_agent_delegates_with_varied_role_cases():
            "Delegate a specific task to one of the following coworkers:"
            in delegation_tool.description
        )
-        assert (
-            " Researcher " in delegation_tool.description
-            or "SENIOR WRITER" in delegation_tool.description
-        )
+        assert "SENIOR WRITER" in delegation_tool.description

        assert (
            "Ask a specific question to one of the following coworkers:"
            in question_tool.description
        )
-        assert (
-            " Researcher " in question_tool.description
-            or "SENIOR WRITER" in question_tool.description
-        )
+        assert "SENIOR WRITER" in question_tool.description
+
+
+def test_hierarchical_delegation_excludes_assigned_agent():
+    """
+    Test that in hierarchical mode, when a task has an assigned agent,
+    the delegation tools exclude that agent and include all other agents.
+    This is a direct unit test for the fix of issue #4783.
+    """
+    agent1 = Agent(
+        role="Agent One",
+        goal="Do task one",
+        backstory="First agent",
+        allow_delegation=False,
+    )
+    agent2 = Agent(
+        role="Agent Two",
+        goal="Do task two",
+        backstory="Second agent",
+        allow_delegation=False,
+    )
+    agent3 = Agent(
+        role="Agent Three",
+        goal="Do task three",
+        backstory="Third agent",
+        allow_delegation=False,
+    )
+
+    task = Task(
+        description="A task assigned to agent1",
+        expected_output="Some output",
+        agent=agent1,
+    )
+
+    crew = Crew(
+        agents=[agent1, agent2, agent3],
+        process=Process.hierarchical,
+        manager_llm="gpt-4o",
+        tasks=[task],
+    )
+
+    mock_task_output = TaskOutput(
+        description="Mock description", raw="mocked output", agent="mocked agent", messages=[]
+    )
+    task.output = mock_task_output
+
+    with patch.object(
+        Task, "execute_sync", return_value=mock_task_output
+    ) as mock_execute_sync:
+        crew.kickoff()
+
+        mock_execute_sync.assert_called_once()
+
+        _, kwargs = mock_execute_sync.call_args
+        tools = kwargs["tools"]
+
+        assert len(tools) == 2
+
+        # Delegation tools should mention Agent Two and Agent Three, but NOT Agent One
+        delegation_descriptions = " ".join(tool.description for tool in tools)
+        assert "Agent Two" in delegation_descriptions
+        assert "Agent Three" in delegation_descriptions
+        assert "Agent One" not in delegation_descriptions
+
+
+def test_hierarchical_delegation_with_multiple_tasks():
+    """
+    Test that each task in hierarchical mode gets delegation tools
+    targeting the correct set of other agents (excluding the assigned agent).
+    """
+    analyst = Agent(
+        role="Data Analyst",
+        goal="Analyze data",
+        backstory="Expert data analyst",
+        allow_delegation=False,
+    )
+    writer = Agent(
+        role="Report Writer",
+        goal="Write reports",
+        backstory="Expert report writer",
+        allow_delegation=False,
+    )
+
+    task1 = Task(
+        description="Analyze the data",
+        expected_output="Analysis results",
+        agent=analyst,
+    )
+    task2 = Task(
+        description="Write the report",
+        expected_output="Final report",
+        agent=writer,
+    )
+
+    crew = Crew(
+        agents=[analyst, writer],
+        process=Process.hierarchical,
+        manager_llm="gpt-4o",
+        tasks=[task1, task2],
+    )
+
+    mock_task_output = TaskOutput(
+        description="Mock description", raw="mocked output", agent="mocked agent", messages=[]
+    )
+    task1.output = mock_task_output
+    task2.output = mock_task_output
+
+    call_args_list = []
+
+    def capture_execute_sync(**kwargs):
+        call_args_list.append(kwargs)
+        return mock_task_output
+
+    with patch.object(
+        Task, "execute_sync", side_effect=capture_execute_sync
+    ):
+        crew.kickoff()
+
+        assert len(call_args_list) == 2
+
+        # First task (assigned to analyst): delegation should target writer only
+        tools_task1 = call_args_list[0]["tools"]
+        desc1 = " ".join(tool.description for tool in tools_task1)
+        assert "Report Writer" in desc1
+        assert "Data Analyst" not in desc1
+
+        # Second task (assigned to writer): delegation should target analyst only
+        tools_task2 = call_args_list[1]["tools"]
+        desc2 = " ".join(tool.description for tool in tools_task2)
+        assert "Data Analyst" in desc2
+        assert "Report Writer" not in desc2
+
+
+def test_hierarchical_delegation_no_assigned_agent_delegates_to_all():
+    """
+    Test that when no agent is assigned to a task in hierarchical mode,
+    the manager can delegate to ALL agents.
+    """
+    agent1 = Agent(
+        role="Agent Alpha",
+        goal="Do alpha work",
+        backstory="Alpha agent",
+        allow_delegation=False,
+    )
+    agent2 = Agent(
+        role="Agent Beta",
+        goal="Do beta work",
+        backstory="Beta agent",
+        allow_delegation=False,
+    )
+
+    task = Task(
+        description="A task with no assigned agent",
+        expected_output="Some output",
+    )
+
+    crew = Crew(
+        agents=[agent1, agent2],
+        process=Process.hierarchical,
+        manager_llm="gpt-4o",
+        tasks=[task],
+    )
+
+    mock_task_output = TaskOutput(
+        description="Mock description", raw="mocked output", agent="mocked agent", messages=[]
+    )
+    task.output = mock_task_output
+
+    with patch.object(
+        Task, "execute_sync", return_value=mock_task_output
+    ) as mock_execute_sync:
+        crew.kickoff()
+
+        mock_execute_sync.assert_called_once()
+
+        _, kwargs = mock_execute_sync.call_args
+        tools = kwargs["tools"]
+
+        assert len(tools) == 2
+
+        # Both agents should be available for delegation
+        delegation_descriptions = " ".join(tool.description for tool in tools)
+        assert "Agent Alpha" in delegation_descriptions
+        assert "Agent Beta" in delegation_descriptions


@pytest.mark.vcr()
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -614,11 +614,6 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
 def test_handle_streaming_tool_calls_no_available_functions(
    get_weather_tool_schema, mock_emit
 ):
-    """When tools are provided but available_functions is not (defaults to None),
-    the streaming handler should return the accumulated tool calls so the caller
-    (e.g., CrewAgentExecutor) can handle them. This is the fix for issue #4788
-    where tool calls were previously discarded and an empty string was returned.
-    """
    llm = LLM(model="openai/gpt-4o", stream=True, is_litellm=True)
    response = llm.call(
        messages=[
@@ -626,14 +621,7 @@ def test_handle_streaming_tool_calls_no_available_functions(
        ],
        tools=[get_weather_tool_schema],
    )
-    # With the fix for #4788, tool calls should be returned as a list
-    # instead of being discarded (previously returned "")
-    assert isinstance(response, list), (
-        f"Expected list of tool calls but got {type(response)}: {response}"
-    )
-    assert len(response) == 1
-    assert response[0].function.name == "get_weather"
-    assert response[0].function.arguments == '{"location":"New York, NY"}'
+    assert response == ""

    assert_event_count(
        mock_emit=mock_emit,
@@ -1034,166 +1022,3 @@ async def test_usage_info_streaming_with_acall():
    assert llm._token_usage["total_tokens"] > 0

    assert len(result) > 0
-
-
-def test_non_streaming_tool_calls_returned_when_no_available_functions():
-    """Test that tool calls are returned (not text) when available_functions is None.
-
-    This reproduces the bug from issue #4788 where LLMs like Anthropic return both
-    text content AND tool calls in the same response. When available_functions=None
-    (as used by the executor for native tool handling), tool calls should be returned
-    instead of the text content.
-    """
-    from litellm.types.utils import ChatCompletionMessageToolCall, Function
-
-    llm = LLM(model="gpt-4o-mini", is_litellm=True)
-
-    # Mock a response that has BOTH text content AND tool calls
-    mock_tool_call = ChatCompletionMessageToolCall(
-        id="call_123",
-        type="function",
-        function=Function(
-            name="code_search",
-            arguments='{"query": "test query"}',
-        ),
-    )
-    mock_message = MagicMock()
-    mock_message.content = "I will search for the given query."
-    mock_message.tool_calls = [mock_tool_call]
-
-    mock_choice = MagicMock()
-    mock_choice.message = mock_message
-
-    mock_response = MagicMock()
-    mock_response.choices = [mock_choice]
-    mock_response.usage = MagicMock()
-    mock_response.usage.prompt_tokens = 10
-    mock_response.usage.completion_tokens = 5
-    mock_response.usage.total_tokens = 15
-
-    with patch("litellm.completion", return_value=mock_response):
-        # Call WITHOUT available_functions (as the executor does for native tool handling)
-        result = llm.call(
-            messages=[{"role": "user", "content": "Search for something"}],
-            tools=[{"type": "function", "function": {"name": "code_search"}}],
-            available_functions=None,
-        )
-
-    # Result should be the tool calls list, NOT the text response
-    assert isinstance(result, list), (
-        f"Expected list of tool calls but got {type(result)}: {result}"
-    )
-    assert len(result) == 1
-    assert result[0].function.name == "code_search"
-
-
-def test_non_streaming_text_returned_when_no_tool_calls():
-    """Test that text response is still returned when there are no tool calls."""
-    llm = LLM(model="gpt-4o-mini", is_litellm=True)
-
-    mock_message = MagicMock()
-    mock_message.content = "The capital of France is Paris."
-    mock_message.tool_calls = None
-
-    mock_choice = MagicMock()
-    mock_choice.message = mock_message
-
-    mock_response = MagicMock()
-    mock_response.choices = [mock_choice]
-    mock_response.usage = MagicMock()
-    mock_response.usage.prompt_tokens = 10
-    mock_response.usage.completion_tokens = 5
-    mock_response.usage.total_tokens = 15
-
-    with patch("litellm.completion", return_value=mock_response):
-        result = llm.call(
-            messages=[{"role": "user", "content": "What is the capital of France?"}],
-        )
-
-    assert isinstance(result, str)
-    assert result == "The capital of France is Paris."
-
-
-@pytest.mark.asyncio
-async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
-    """Test async path: tool calls are returned (not text) when available_functions is None.
-
-    Same bug as #4788 but for the async non-streaming handler.
-    """
-    from litellm.types.utils import ChatCompletionMessageToolCall, Function
-
-    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
-
-    mock_tool_call = ChatCompletionMessageToolCall(
-        id="call_456",
-        type="function",
-        function=Function(
-            name="web_search",
-            arguments='{"query": "test"}',
-        ),
-    )
-    mock_message = MagicMock()
-    mock_message.content = "I will search the web."
-    mock_message.tool_calls = [mock_tool_call]
-
-    mock_choice = MagicMock()
-    mock_choice.message = mock_message
-
-    mock_response = MagicMock()
-    mock_response.choices = [mock_choice]
-    mock_response.usage = MagicMock()
-    mock_response.usage.prompt_tokens = 10
-    mock_response.usage.completion_tokens = 5
-    mock_response.usage.total_tokens = 15
-
-    with patch("litellm.acompletion", return_value=mock_response):
-        result = await llm.acall(
-            messages=[{"role": "user", "content": "Search for something"}],
-            tools=[{"type": "function", "function": {"name": "web_search"}}],
-            available_functions=None,
-        )
-
-    assert isinstance(result, list), (
-        f"Expected list of tool calls but got {type(result)}: {result}"
-    )
-    assert len(result) == 1
-    assert result[0].function.name == "web_search"
-
-
-def test_non_streaming_tool_calls_executed_when_available_functions_provided():
-    """Test that tool calls are still executed when available_functions IS provided.
-
-    This ensures the fix doesn't break the normal tool execution path.
-    """
-    llm = LLM(model="gpt-4o-mini", is_litellm=True)
-
-    mock_tool_call = MagicMock()
-    mock_tool_call.function.name = "get_weather"
-    mock_tool_call.function.arguments = '{"location": "New York"}'
-
-    mock_message = MagicMock()
-    mock_message.content = "I will check the weather."
-    mock_message.tool_calls = [mock_tool_call]
-
-    mock_choice = MagicMock()
-    mock_choice.message = mock_message
-
-    mock_response = MagicMock()
-    mock_response.choices = [mock_choice]
-    mock_response.usage = MagicMock()
-    mock_response.usage.prompt_tokens = 10
-    mock_response.usage.completion_tokens = 5
-    mock_response.usage.total_tokens = 15
-
-    def get_weather(location: str) -> str:
-        return f"Sunny in {location}"
-
-    with patch("litellm.completion", return_value=mock_response):
-        result = llm.call(
-            messages=[{"role": "user", "content": "What's the weather?"}],
-            tools=[{"type": "function", "function": {"name": "get_weather"}}],
-            available_functions={"get_weather": get_weather},
-        )
-
-    # When available_functions is provided, the tool should be executed
-    assert result == "Sunny in New York"