fix: prevent shared LLM stop words mutation across agents (#5141)

When multiple agents share the same LLM instance, the executor was
directly mutating the shared LLM's stop words list. This caused
cross-agent state pollution where stop words would accumulate across
agents and across multiple crew.kickoff() calls.

Fix: create a shallow copy of the LLM before merging stop words,
so each executor gets its own isolated stop word list. Only copy
when new stop words are actually being added to avoid unnecessary
allocations.

Applied to both CrewAgentExecutor and experimental AgentExecutor.

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-03-27 14:11:03 +00:00
parent 9fe0c15549
commit efd5c90fd2
4 changed files with 248 additions and 5 deletions

View File

@@ -10,6 +10,7 @@ import asyncio
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
import contextvars
import copy
import inspect
import logging
from typing import TYPE_CHECKING, Any, Literal, cast
@@ -162,15 +163,21 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.before_llm_call_hooks.extend(get_before_llm_call_hooks())
self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
if self.llm:
# This may be mutating the shared llm object and needs further evaluation
existing_stop = getattr(self.llm, "stop", [])
self.llm.stop = list(
# Create a shallow copy of the LLM to avoid mutating the shared
# instance's stop words. When multiple agents share the same LLM,
# directly mutating stop words causes cross-agent state pollution
# where stop words accumulate across agents. (see #5141)
existing_stop = getattr(self.llm, "stop", []) or []
merged_stop = list(
set(
existing_stop + self.stop
if isinstance(existing_stop, list)
else self.stop
)
)
if merged_stop != (existing_stop if isinstance(existing_stop, list) else []):
self.llm = copy.copy(self.llm)
self.llm.stop = merged_stop
@property
def use_stop_words(self) -> bool:

View File

@@ -4,6 +4,7 @@ import asyncio
from collections.abc import Callable, Coroutine
from concurrent.futures import ThreadPoolExecutor, as_completed
import contextvars
import copy
from datetime import datetime
import inspect
import json
@@ -256,14 +257,21 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
if self.llm:
existing_stop = getattr(self.llm, "stop", [])
self.llm.stop = list(
# Create a shallow copy of the LLM to avoid mutating the shared
# instance's stop words. When multiple agents share the same LLM,
# directly mutating stop words causes cross-agent state pollution
# where stop words accumulate across agents. (see #5141)
existing_stop = getattr(self.llm, "stop", []) or []
merged_stop = list(
set(
existing_stop + self.stop
if isinstance(existing_stop, list)
else self.stop
)
)
if merged_stop != (existing_stop if isinstance(existing_stop, list) else []):
self.llm = copy.copy(self.llm)
self.llm.stop = merged_stop
self._state = AgentExecutorState()
# Plan-and-Execute components (Phase 2)

View File

@@ -2421,3 +2421,84 @@ def test_agent_mcps_accepts_legacy_prefix_with_tool():
mcps=["crewai-amp:notion#get_page"],
)
assert agent.mcps == ["crewai-amp:notion#get_page"]
class TestCrewAgentExecutorSharedLLMStopWords:
"""Regression tests for shared LLM stop words mutation in CrewAgentExecutor (issue #5141).
When multiple agents share the same LLM instance, each CrewAgentExecutor
should NOT mutate the shared LLM's stop words.
"""
def _make_executor(self, llm, stop_words):
"""Helper to create a CrewAgentExecutor with minimal deps."""
from crewai.agents.tools_handler import ToolsHandler
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
)
task = Task(
description="Test task",
expected_output="Test output",
agent=agent,
)
return CrewAgentExecutor(
agent=agent,
task=task,
llm=llm,
crew=None,
prompt={"prompt": "Test {input} {tool_names} {tools}"},
max_iter=5,
tools=[],
tools_names="",
stop_words=stop_words,
tools_description="",
tools_handler=ToolsHandler(),
)
def test_shared_llm_not_mutated(self):
"""Creating a CrewAgentExecutor should NOT mutate the shared LLM's stop words."""
shared_llm = LLM(model="gpt-4", stop=["Original:"])
original_stop = list(shared_llm.stop)
self._make_executor(shared_llm, stop_words=["Observation:"])
assert shared_llm.stop == original_stop
def test_multiple_executors_isolate_stop_words(self):
"""Multiple executors sharing an LLM should each have isolated stop words."""
shared_llm = LLM(model="gpt-4", stop=["Original:"])
original_stop = list(shared_llm.stop)
exec_a = self._make_executor(shared_llm, stop_words=["StopA:"])
exec_b = self._make_executor(shared_llm, stop_words=["StopB:"])
# Shared LLM must be unmodified
assert shared_llm.stop == original_stop
# Each executor should have its own LLM copy
assert exec_a.llm is not shared_llm
assert exec_b.llm is not shared_llm
assert exec_a.llm is not exec_b.llm
# exec_a should have Original: + StopA: only
assert "Original:" in exec_a.llm.stop
assert "StopA:" in exec_a.llm.stop
assert "StopB:" not in exec_a.llm.stop
# exec_b should have Original: + StopB: only
assert "Original:" in exec_b.llm.stop
assert "StopB:" in exec_b.llm.stop
assert "StopA:" not in exec_b.llm.stop
def test_stop_words_do_not_accumulate_across_kickoffs(self):
"""Simulating multiple kickoffs: stop words must not grow on the shared LLM."""
shared_llm = LLM(model="gpt-4", stop=["Original:"])
original_stop = list(shared_llm.stop)
for _ in range(5):
self._make_executor(shared_llm, stop_words=["Observation:"])
assert shared_llm.stop == original_stop

View File

@@ -2002,3 +2002,150 @@ class TestVisionImageFormatContract:
assert hasattr(AnthropicCompletion, "_convert_image_blocks"), (
"Anthropic provider must have _convert_image_blocks for auto-conversion"
)
class TestSharedLLMStopWordsMutation:
"""Regression tests for shared LLM stop words mutation (issue #5141).
When multiple agents share the same LLM instance, each executor should
NOT mutate the shared LLM's stop words. Instead, each executor should
get its own copy of the LLM with the merged stop words.
"""
@pytest.fixture
def shared_llm(self):
"""Create a shared mock LLM with initial stop words."""
llm = Mock()
llm.supports_stop_words.return_value = True
llm.stop = ["Original:"]
return llm
@pytest.fixture
def base_deps(self, shared_llm):
"""Create base dependencies using the shared LLM."""
task = Mock()
task.description = "Test task"
crew = Mock()
crew.verbose = False
crew._train = False
prompt = {"prompt": "Test {input}"}
return {
"llm": shared_llm,
"task": task,
"crew": crew,
"prompt": prompt,
"max_iter": 10,
"tools": [],
"tools_names": "",
"tools_description": "",
"tools_handler": Mock(),
}
def test_shared_llm_not_mutated_by_executor(self, shared_llm, base_deps):
"""Creating an executor should NOT mutate the shared LLM's stop words."""
original_stop = list(shared_llm.stop)
agent = Mock()
agent.id = "agent-1"
agent.role = "Agent 1"
agent.verbose = False
agent.key = "key-1"
AgentExecutor(
**base_deps,
agent=agent,
stop_words=["Observation:"],
)
# The shared LLM's stop words must remain unchanged
assert shared_llm.stop == original_stop
def test_multiple_executors_do_not_accumulate_stop_words(
self, shared_llm, base_deps
):
"""Multiple executors sharing an LLM should not accumulate stop words."""
original_stop = list(shared_llm.stop)
agent_a = Mock()
agent_a.id = "agent-a"
agent_a.role = "Agent A"
agent_a.verbose = False
agent_a.key = "key-a"
agent_b = Mock()
agent_b.id = "agent-b"
agent_b.role = "Agent B"
agent_b.verbose = False
agent_b.key = "key-b"
exec_a = AgentExecutor(
**base_deps,
agent=agent_a,
stop_words=["StopA:"],
)
exec_b = AgentExecutor(
**base_deps,
agent=agent_b,
stop_words=["StopB:"],
)
# Shared LLM must be unmodified
assert shared_llm.stop == original_stop
# Each executor should have its own LLM copy with the correct merged stop words
assert exec_a.llm is not shared_llm
assert exec_b.llm is not shared_llm
assert exec_a.llm is not exec_b.llm
# exec_a should have Original: + StopA:
assert "Original:" in exec_a.llm.stop
assert "StopA:" in exec_a.llm.stop
assert "StopB:" not in exec_a.llm.stop
# exec_b should have Original: + StopB:
assert "Original:" in exec_b.llm.stop
assert "StopB:" in exec_b.llm.stop
assert "StopA:" not in exec_b.llm.stop
def test_executor_no_copy_when_no_new_stop_words(self, shared_llm, base_deps):
"""Executor should not copy LLM when stop_words don't add anything new."""
# The shared LLM already has ["Original:"]
agent = Mock()
agent.id = "agent-1"
agent.role = "Agent 1"
agent.verbose = False
agent.key = "key-1"
executor = AgentExecutor(
**base_deps,
agent=agent,
stop_words=[], # No new stop words
)
# When no new stop words are added, the LLM should not be copied
assert executor.llm is shared_llm
def test_stop_words_persist_across_multiple_kickoffs(self, shared_llm, base_deps):
"""Stop words should not accumulate across multiple executor creations
(simulating multiple crew.kickoff() calls)."""
original_stop = list(shared_llm.stop)
for i in range(5):
agent = Mock()
agent.id = f"agent-{i}"
agent.role = f"Agent {i}"
agent.verbose = False
agent.key = f"key-{i}"
AgentExecutor(
**base_deps,
agent=agent,
stop_words=["Observation:"],
)
# After 5 executor creations, the shared LLM's stop words must be unchanged
assert shared_llm.stop == original_stop