Compare commits

..

1 Commits

Author SHA1 Message Date
Devin AI
9ba1ec03a8 fix: hierarchical process delegation targets other agents instead of self
Fix a bug in _update_manager_tools() where, when a task had an assigned
agent in hierarchical mode, the delegation tools were configured to only
allow delegation to the assigned agent itself ([task.agent]), making
delegation circular and ineffective.

Now delegation tools correctly target all OTHER agents in the crew
(excluding the assigned agent), enabling proper task delegation in
hierarchical process mode.

Fixes #4783

Co-Authored-By: João <joao@crewai.com>
2026-03-09 03:24:37 +00:00
4 changed files with 219 additions and 264 deletions

View File

@@ -1480,7 +1480,8 @@ class Crew(FlowTrackable, BaseModel):
) -> list[BaseTool]:
if self.manager_agent:
if task.agent:
tools = self._inject_delegation_tools(tools, task.agent, [task.agent])
other_agents = [agent for agent in self.agents if agent != task.agent]
tools = self._inject_delegation_tools(tools, task.agent, other_agents)
else:
tools = self._inject_delegation_tools(
tools, self.manager_agent, self.agents

View File

@@ -967,43 +967,7 @@ class LLM(BaseLLM):
self._track_token_usage_internal(usage_info)
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
# --- 8) Check accumulated_tool_args from streaming deltas
# Streaming responses deliver tool calls via deltas accumulated in
# accumulated_tool_args, not via the final chunk's message. When
# available_functions is None (native tool handling), we must return
# the accumulated tool calls so the caller (e.g., executor) can
# handle them. When available_functions is provided, tool execution
# already happened during the chunk processing loop via
# _handle_streaming_tool_calls.
if accumulated_tool_args and not available_functions:
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
ChatCompletionDeltaToolCall(
index=idx,
function=Function(
name=tool_arg.function.name,
arguments=tool_arg.function.arguments,
),
)
for idx, tool_arg in accumulated_tool_args.items()
if tool_arg.function.name
]
if tool_calls_list:
self._handle_emit_call_events(
response=full_response,
call_type=LLMCallType.LLM_CALL,
from_task=from_task,
from_agent=from_agent,
messages=params["messages"],
)
return tool_calls_list
# --- 8b) If there are tool calls from last chunk but no available functions,
# return the tool calls
if tool_calls and not available_functions:
return tool_calls
if not tool_calls and not accumulated_tool_args:
if not tool_calls or not available_functions:
if response_model and self.is_litellm:
instructor_instance = InternalInstructor(
content=full_response,
@@ -1030,11 +994,10 @@ class LLM(BaseLLM):
)
return full_response
# --- 9) Handle tool calls from last chunk if present (execute when available_functions provided)
if tool_calls and available_functions:
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 9) Handle tool calls if present
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 10) Emit completion event and return response
self._handle_emit_call_events(
@@ -1271,17 +1234,8 @@ class LLM(BaseLLM):
# --- 4) Check for tool calls
tool_calls = getattr(response_message, "tool_calls", [])
# --- 5) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
# The isinstance check ensures we have actual tool call data (list), not
# auto-generated attributes from mocks or unexpected types.
if isinstance(tool_calls, list) and tool_calls and not available_functions:
return tool_calls
# --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
if not tool_calls and text_response:
# --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
if (not tool_calls or not available_functions) and text_response:
self._handle_emit_call_events(
response=text_response,
call_type=LLMCallType.LLM_CALL,
@@ -1291,8 +1245,13 @@ class LLM(BaseLLM):
)
return text_response
# --- 6) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
if tool_calls and not available_functions:
return tool_calls
# --- 7) Handle tool calls if present (execute when available_functions provided)
if isinstance(tool_calls, list) and tool_calls and available_functions:
if tool_calls and available_functions:
tool_result = self._handle_tool_call(
tool_calls, available_functions, from_task, from_agent
)
@@ -1405,16 +1364,7 @@ class LLM(BaseLLM):
tool_calls = getattr(response_message, "tool_calls", [])
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
# The isinstance check ensures we have actual tool call data (list), not
# auto-generated attributes from mocks or unexpected types.
if isinstance(tool_calls, list) and tool_calls and not available_functions:
return tool_calls
if not tool_calls and text_response:
if (not tool_calls or not available_functions) and text_response:
self._handle_emit_call_events(
response=text_response,
call_type=LLMCallType.LLM_CALL,
@@ -1424,8 +1374,13 @@ class LLM(BaseLLM):
)
return text_response
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
if tool_calls and not available_functions:
return tool_calls
# Handle tool calls if present (execute when available_functions provided)
if isinstance(tool_calls, list) and tool_calls and available_functions:
if tool_calls and available_functions:
tool_result = self._handle_tool_call(
tool_calls, available_functions, from_task, from_agent
)
@@ -1558,7 +1513,7 @@ class LLM(BaseLLM):
if usage_info:
self._track_token_usage_internal(usage_info)
if accumulated_tool_args:
if accumulated_tool_args and available_functions:
# Convert accumulated tool args to ChatCompletionDeltaToolCall objects
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
ChatCompletionDeltaToolCall(
@@ -1572,14 +1527,7 @@ class LLM(BaseLLM):
if tool_arg.function.name
]
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution.
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
if tool_calls_list and not available_functions:
return tool_calls_list
if tool_calls_list and available_functions:
if tool_calls_list:
result = self._handle_streaming_tool_calls(
tool_calls=tool_calls_list,
accumulated_tool_args=accumulated_tool_args,

View File

@@ -394,7 +394,9 @@ def test_manager_llm_requirement_for_hierarchical_process(researcher, writer):
@pytest.mark.vcr()
def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer):
"""
Test that the manager agent delegates to the assigned task agent.
Test that when a task is assigned to a specific agent in hierarchical mode,
the delegation tools allow delegating to OTHER agents (not the assigned agent itself).
This verifies the fix for issue #4783.
"""
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
@@ -429,15 +431,15 @@ def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer):
_, kwargs = mock_execute_sync.call_args
tools = kwargs["tools"]
# Verify the delegation tools were passed correctly
# Verify the delegation tools target OTHER agents, not the assigned agent itself
assert len(tools) == 2
assert any(
"Delegate a specific task to one of the following coworkers: Researcher"
"Delegate a specific task to one of the following coworkers: Senior Writer"
in tool.description
for tool in tools
)
assert any(
"Ask a specific question to one of the following coworkers: Researcher"
"Ask a specific question to one of the following coworkers: Senior Writer"
in tool.description
for tool in tools
)
@@ -481,6 +483,7 @@ def test_manager_agent_delegates_with_varied_role_cases():
"""
Test that the manager agent can delegate to agents regardless of case or whitespace variations in role names.
This test verifies the fix for issue #1503 where role matching was too strict.
After fix for #4783, delegation targets should be the OTHER agents (not the assigned agent).
"""
# Create agents with varied case and whitespace in roles
researcher_spaced = Agent(
@@ -530,7 +533,8 @@ def test_manager_agent_delegates_with_varied_role_cases():
# Verify the delegation tools were passed correctly and can handle case/whitespace variations
assert len(tools) == 2
# Check delegation tool descriptions (should work despite case/whitespace differences)
# Check delegation tool descriptions - should target the OTHER agent (SENIOR WRITER),
# not the assigned agent (Researcher)
delegation_tool = tools[0]
question_tool = tools[1]
@@ -538,19 +542,196 @@ def test_manager_agent_delegates_with_varied_role_cases():
"Delegate a specific task to one of the following coworkers:"
in delegation_tool.description
)
assert (
" Researcher " in delegation_tool.description
or "SENIOR WRITER" in delegation_tool.description
)
assert "SENIOR WRITER" in delegation_tool.description
assert (
"Ask a specific question to one of the following coworkers:"
in question_tool.description
)
assert (
" Researcher " in question_tool.description
or "SENIOR WRITER" in question_tool.description
)
assert "SENIOR WRITER" in question_tool.description
def test_hierarchical_delegation_excludes_assigned_agent():
"""
Test that in hierarchical mode, when a task has an assigned agent,
the delegation tools exclude that agent and include all other agents.
This is a direct unit test for the fix of issue #4783.
"""
agent1 = Agent(
role="Agent One",
goal="Do task one",
backstory="First agent",
allow_delegation=False,
)
agent2 = Agent(
role="Agent Two",
goal="Do task two",
backstory="Second agent",
allow_delegation=False,
)
agent3 = Agent(
role="Agent Three",
goal="Do task three",
backstory="Third agent",
allow_delegation=False,
)
task = Task(
description="A task assigned to agent1",
expected_output="Some output",
agent=agent1,
)
crew = Crew(
agents=[agent1, agent2, agent3],
process=Process.hierarchical,
manager_llm="gpt-4o",
tasks=[task],
)
mock_task_output = TaskOutput(
description="Mock description", raw="mocked output", agent="mocked agent", messages=[]
)
task.output = mock_task_output
with patch.object(
Task, "execute_sync", return_value=mock_task_output
) as mock_execute_sync:
crew.kickoff()
mock_execute_sync.assert_called_once()
_, kwargs = mock_execute_sync.call_args
tools = kwargs["tools"]
assert len(tools) == 2
# Delegation tools should mention Agent Two and Agent Three, but NOT Agent One
delegation_descriptions = " ".join(tool.description for tool in tools)
assert "Agent Two" in delegation_descriptions
assert "Agent Three" in delegation_descriptions
assert "Agent One" not in delegation_descriptions
def test_hierarchical_delegation_with_multiple_tasks():
"""
Test that each task in hierarchical mode gets delegation tools
targeting the correct set of other agents (excluding the assigned agent).
"""
analyst = Agent(
role="Data Analyst",
goal="Analyze data",
backstory="Expert data analyst",
allow_delegation=False,
)
writer = Agent(
role="Report Writer",
goal="Write reports",
backstory="Expert report writer",
allow_delegation=False,
)
task1 = Task(
description="Analyze the data",
expected_output="Analysis results",
agent=analyst,
)
task2 = Task(
description="Write the report",
expected_output="Final report",
agent=writer,
)
crew = Crew(
agents=[analyst, writer],
process=Process.hierarchical,
manager_llm="gpt-4o",
tasks=[task1, task2],
)
mock_task_output = TaskOutput(
description="Mock description", raw="mocked output", agent="mocked agent", messages=[]
)
task1.output = mock_task_output
task2.output = mock_task_output
call_args_list = []
def capture_execute_sync(**kwargs):
call_args_list.append(kwargs)
return mock_task_output
with patch.object(
Task, "execute_sync", side_effect=capture_execute_sync
):
crew.kickoff()
assert len(call_args_list) == 2
# First task (assigned to analyst): delegation should target writer only
tools_task1 = call_args_list[0]["tools"]
desc1 = " ".join(tool.description for tool in tools_task1)
assert "Report Writer" in desc1
assert "Data Analyst" not in desc1
# Second task (assigned to writer): delegation should target analyst only
tools_task2 = call_args_list[1]["tools"]
desc2 = " ".join(tool.description for tool in tools_task2)
assert "Data Analyst" in desc2
assert "Report Writer" not in desc2
def test_hierarchical_delegation_no_assigned_agent_delegates_to_all():
"""
Test that when no agent is assigned to a task in hierarchical mode,
the manager can delegate to ALL agents.
"""
agent1 = Agent(
role="Agent Alpha",
goal="Do alpha work",
backstory="Alpha agent",
allow_delegation=False,
)
agent2 = Agent(
role="Agent Beta",
goal="Do beta work",
backstory="Beta agent",
allow_delegation=False,
)
task = Task(
description="A task with no assigned agent",
expected_output="Some output",
)
crew = Crew(
agents=[agent1, agent2],
process=Process.hierarchical,
manager_llm="gpt-4o",
tasks=[task],
)
mock_task_output = TaskOutput(
description="Mock description", raw="mocked output", agent="mocked agent", messages=[]
)
task.output = mock_task_output
with patch.object(
Task, "execute_sync", return_value=mock_task_output
) as mock_execute_sync:
crew.kickoff()
mock_execute_sync.assert_called_once()
_, kwargs = mock_execute_sync.call_args
tools = kwargs["tools"]
assert len(tools) == 2
# Both agents should be available for delegation
delegation_descriptions = " ".join(tool.description for tool in tools)
assert "Agent Alpha" in delegation_descriptions
assert "Agent Beta" in delegation_descriptions
@pytest.mark.vcr()

View File

@@ -614,11 +614,6 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
def test_handle_streaming_tool_calls_no_available_functions(
get_weather_tool_schema, mock_emit
):
"""When tools are provided but available_functions is not (defaults to None),
the streaming handler should return the accumulated tool calls so the caller
(e.g., CrewAgentExecutor) can handle them. This is the fix for issue #4788
where tool calls were previously discarded and an empty string was returned.
"""
llm = LLM(model="openai/gpt-4o", stream=True, is_litellm=True)
response = llm.call(
messages=[
@@ -626,14 +621,7 @@ def test_handle_streaming_tool_calls_no_available_functions(
],
tools=[get_weather_tool_schema],
)
# With the fix for #4788, tool calls should be returned as a list
# instead of being discarded (previously returned "")
assert isinstance(response, list), (
f"Expected list of tool calls but got {type(response)}: {response}"
)
assert len(response) == 1
assert response[0].function.name == "get_weather"
assert response[0].function.arguments == '{"location":"New York, NY"}'
assert response == ""
assert_event_count(
mock_emit=mock_emit,
@@ -1034,166 +1022,3 @@ async def test_usage_info_streaming_with_acall():
assert llm._token_usage["total_tokens"] > 0
assert len(result) > 0
def test_non_streaming_tool_calls_returned_when_no_available_functions():
"""Test that tool calls are returned (not text) when available_functions is None.
This reproduces the bug from issue #4788 where LLMs like Anthropic return both
text content AND tool calls in the same response. When available_functions=None
(as used by the executor for native tool handling), tool calls should be returned
instead of the text content.
"""
from litellm.types.utils import ChatCompletionMessageToolCall, Function
llm = LLM(model="gpt-4o-mini", is_litellm=True)
# Mock a response that has BOTH text content AND tool calls
mock_tool_call = ChatCompletionMessageToolCall(
id="call_123",
type="function",
function=Function(
name="code_search",
arguments='{"query": "test query"}',
),
)
mock_message = MagicMock()
mock_message.content = "I will search for the given query."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.completion", return_value=mock_response):
# Call WITHOUT available_functions (as the executor does for native tool handling)
result = llm.call(
messages=[{"role": "user", "content": "Search for something"}],
tools=[{"type": "function", "function": {"name": "code_search"}}],
available_functions=None,
)
# Result should be the tool calls list, NOT the text response
assert isinstance(result, list), (
f"Expected list of tool calls but got {type(result)}: {result}"
)
assert len(result) == 1
assert result[0].function.name == "code_search"
def test_non_streaming_text_returned_when_no_tool_calls():
"""Test that text response is still returned when there are no tool calls."""
llm = LLM(model="gpt-4o-mini", is_litellm=True)
mock_message = MagicMock()
mock_message.content = "The capital of France is Paris."
mock_message.tool_calls = None
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.completion", return_value=mock_response):
result = llm.call(
messages=[{"role": "user", "content": "What is the capital of France?"}],
)
assert isinstance(result, str)
assert result == "The capital of France is Paris."
@pytest.mark.asyncio
async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
"""Test async path: tool calls are returned (not text) when available_functions is None.
Same bug as #4788 but for the async non-streaming handler.
"""
from litellm.types.utils import ChatCompletionMessageToolCall, Function
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
mock_tool_call = ChatCompletionMessageToolCall(
id="call_456",
type="function",
function=Function(
name="web_search",
arguments='{"query": "test"}',
),
)
mock_message = MagicMock()
mock_message.content = "I will search the web."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.acompletion", return_value=mock_response):
result = await llm.acall(
messages=[{"role": "user", "content": "Search for something"}],
tools=[{"type": "function", "function": {"name": "web_search"}}],
available_functions=None,
)
assert isinstance(result, list), (
f"Expected list of tool calls but got {type(result)}: {result}"
)
assert len(result) == 1
assert result[0].function.name == "web_search"
def test_non_streaming_tool_calls_executed_when_available_functions_provided():
"""Test that tool calls are still executed when available_functions IS provided.
This ensures the fix doesn't break the normal tool execution path.
"""
llm = LLM(model="gpt-4o-mini", is_litellm=True)
mock_tool_call = MagicMock()
mock_tool_call.function.name = "get_weather"
mock_tool_call.function.arguments = '{"location": "New York"}'
mock_message = MagicMock()
mock_message.content = "I will check the weather."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
def get_weather(location: str) -> str:
return f"Sunny in {location}"
with patch("litellm.completion", return_value=mock_response):
result = llm.call(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=[{"type": "function", "function": {"name": "get_weather"}}],
available_functions={"get_weather": get_weather},
)
# When available_functions is provided, the tool should be executed
assert result == "Sunny in New York"