From efd5c90fd2dde857ad141b3f2cd83c4b01e443b5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 27 Mar 2026 14:11:03 +0000
Subject: [PATCH] fix: prevent shared LLM stop words mutation across agents
 (#5141)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When multiple agents share the same LLM instance, the executor was
directly mutating the shared LLM's stop words list. This caused
cross-agent state pollution where stop words would accumulate across
agents and across multiple crew.kickoff() calls.

Fix: create a shallow copy of the LLM before merging stop words,
so each executor gets its own isolated stop word list. Only copy
when new stop words are actually being added to avoid unnecessary
allocations.

Applied to both CrewAgentExecutor and experimental AgentExecutor.

Co-Authored-By: João <joao@crewai.com>
---
 .../src/crewai/agents/crew_agent_executor.py  |  13 +-
 .../src/crewai/experimental/agent_executor.py |  12 +-
 lib/crewai/tests/agents/test_agent.py         |  81 ++++++++++
 .../tests/agents/test_agent_executor.py       | 147 ++++++++++++++++++
 4 files changed, 248 insertions(+), 5 deletions(-)

diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py
index 0707f59d6..9e36b4d91 100644
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -10,6 +10,7 @@ import asyncio
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import contextvars
+import copy
 import inspect
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -162,15 +163,21 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
         self.before_llm_call_hooks.extend(get_before_llm_call_hooks())
         self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
         if self.llm:
-            # This may be mutating the shared llm object and needs further evaluation
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
+            # Create a shallow copy of the LLM to avoid mutating the shared
+            # instance's stop words. When multiple agents share the same LLM,
+            # directly mutating stop words causes cross-agent state pollution
+            # where stop words accumulate across agents. (see #5141)
+            existing_stop = getattr(self.llm, "stop", []) or []
+            merged_stop = list(
                 set(
                     existing_stop + self.stop
                     if isinstance(existing_stop, list)
                     else self.stop
                 )
             )
+            if merged_stop != (existing_stop if isinstance(existing_stop, list) else []):
+                self.llm = copy.copy(self.llm)
+                self.llm.stop = merged_stop
 
     @property
     def use_stop_words(self) -> bool:
diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py
index b785a102e..3e1e859ae 100644
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -4,6 +4,7 @@ import asyncio
 from collections.abc import Callable, Coroutine
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import contextvars
+import copy
 from datetime import datetime
 import inspect
 import json
@@ -256,14 +257,21 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
         self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
 
         if self.llm:
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
+            # Create a shallow copy of the LLM to avoid mutating the shared
+            # instance's stop words. When multiple agents share the same LLM,
+            # directly mutating stop words causes cross-agent state pollution
+            # where stop words accumulate across agents. (see #5141)
+            existing_stop = getattr(self.llm, "stop", []) or []
+            merged_stop = list(
                 set(
                     existing_stop + self.stop
                     if isinstance(existing_stop, list)
                     else self.stop
                 )
             )
+            if merged_stop != (existing_stop if isinstance(existing_stop, list) else []):
+                self.llm = copy.copy(self.llm)
+                self.llm.stop = merged_stop
         self._state = AgentExecutorState()
 
         # Plan-and-Execute components (Phase 2)
diff --git a/lib/crewai/tests/agents/test_agent.py b/lib/crewai/tests/agents/test_agent.py
index d865ec541..38cca1c87 100644
--- a/lib/crewai/tests/agents/test_agent.py
+++ b/lib/crewai/tests/agents/test_agent.py
@@ -2421,3 +2421,84 @@ def test_agent_mcps_accepts_legacy_prefix_with_tool():
         mcps=["crewai-amp:notion#get_page"],
     )
     assert agent.mcps == ["crewai-amp:notion#get_page"]
+
+
+class TestCrewAgentExecutorSharedLLMStopWords:
+    """Regression tests for shared LLM stop words mutation in CrewAgentExecutor (issue #5141).
+
+    When multiple agents share the same LLM instance, each CrewAgentExecutor
+    should NOT mutate the shared LLM's stop words.
+    """
+
+    def _make_executor(self, llm, stop_words):
+        """Helper to create a CrewAgentExecutor with minimal deps."""
+        from crewai.agents.tools_handler import ToolsHandler
+
+        agent = Agent(
+            role="test role",
+            goal="test goal",
+            backstory="test backstory",
+        )
+        task = Task(
+            description="Test task",
+            expected_output="Test output",
+            agent=agent,
+        )
+        return CrewAgentExecutor(
+            agent=agent,
+            task=task,
+            llm=llm,
+            crew=None,
+            prompt={"prompt": "Test {input} {tool_names} {tools}"},
+            max_iter=5,
+            tools=[],
+            tools_names="",
+            stop_words=stop_words,
+            tools_description="",
+            tools_handler=ToolsHandler(),
+        )
+
+    def test_shared_llm_not_mutated(self):
+        """Creating a CrewAgentExecutor should NOT mutate the shared LLM's stop words."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        self._make_executor(shared_llm, stop_words=["Observation:"])
+
+        assert shared_llm.stop == original_stop
+
+    def test_multiple_executors_isolate_stop_words(self):
+        """Multiple executors sharing an LLM should each have isolated stop words."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        exec_a = self._make_executor(shared_llm, stop_words=["StopA:"])
+        exec_b = self._make_executor(shared_llm, stop_words=["StopB:"])
+
+        # Shared LLM must be unmodified
+        assert shared_llm.stop == original_stop
+
+        # Each executor should have its own LLM copy
+        assert exec_a.llm is not shared_llm
+        assert exec_b.llm is not shared_llm
+        assert exec_a.llm is not exec_b.llm
+
+        # exec_a should have Original: + StopA: only
+        assert "Original:" in exec_a.llm.stop
+        assert "StopA:" in exec_a.llm.stop
+        assert "StopB:" not in exec_a.llm.stop
+
+        # exec_b should have Original: + StopB: only
+        assert "Original:" in exec_b.llm.stop
+        assert "StopB:" in exec_b.llm.stop
+        assert "StopA:" not in exec_b.llm.stop
+
+    def test_stop_words_do_not_accumulate_across_kickoffs(self):
+        """Simulating multiple kickoffs: stop words must not grow on the shared LLM."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        for _ in range(5):
+            self._make_executor(shared_llm, stop_words=["Observation:"])
+
+        assert shared_llm.stop == original_stop
diff --git a/lib/crewai/tests/agents/test_agent_executor.py b/lib/crewai/tests/agents/test_agent_executor.py
index c845bd458..ad29e3e46 100644
--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
@@ -2002,3 +2002,150 @@ class TestVisionImageFormatContract:
         assert hasattr(AnthropicCompletion, "_convert_image_blocks"), (
             "Anthropic provider must have _convert_image_blocks for auto-conversion"
         )
+
+
+class TestSharedLLMStopWordsMutation:
+    """Regression tests for shared LLM stop words mutation (issue #5141).
+
+    When multiple agents share the same LLM instance, each executor should
+    NOT mutate the shared LLM's stop words. Instead, each executor should
+    get its own copy of the LLM with the merged stop words.
+    """
+
+    @pytest.fixture
+    def shared_llm(self):
+        """Create a shared mock LLM with initial stop words."""
+        llm = Mock()
+        llm.supports_stop_words.return_value = True
+        llm.stop = ["Original:"]
+        return llm
+
+    @pytest.fixture
+    def base_deps(self, shared_llm):
+        """Create base dependencies using the shared LLM."""
+        task = Mock()
+        task.description = "Test task"
+
+        crew = Mock()
+        crew.verbose = False
+        crew._train = False
+
+        prompt = {"prompt": "Test {input}"}
+
+        return {
+            "llm": shared_llm,
+            "task": task,
+            "crew": crew,
+            "prompt": prompt,
+            "max_iter": 10,
+            "tools": [],
+            "tools_names": "",
+            "tools_description": "",
+            "tools_handler": Mock(),
+        }
+
+    def test_shared_llm_not_mutated_by_executor(self, shared_llm, base_deps):
+        """Creating an executor should NOT mutate the shared LLM's stop words."""
+        original_stop = list(shared_llm.stop)
+
+        agent = Mock()
+        agent.id = "agent-1"
+        agent.role = "Agent 1"
+        agent.verbose = False
+        agent.key = "key-1"
+
+        AgentExecutor(
+            **base_deps,
+            agent=agent,
+            stop_words=["Observation:"],
+        )
+
+        # The shared LLM's stop words must remain unchanged
+        assert shared_llm.stop == original_stop
+
+    def test_multiple_executors_do_not_accumulate_stop_words(
+        self, shared_llm, base_deps
+    ):
+        """Multiple executors sharing an LLM should not accumulate stop words."""
+        original_stop = list(shared_llm.stop)
+
+        agent_a = Mock()
+        agent_a.id = "agent-a"
+        agent_a.role = "Agent A"
+        agent_a.verbose = False
+        agent_a.key = "key-a"
+
+        agent_b = Mock()
+        agent_b.id = "agent-b"
+        agent_b.role = "Agent B"
+        agent_b.verbose = False
+        agent_b.key = "key-b"
+
+        exec_a = AgentExecutor(
+            **base_deps,
+            agent=agent_a,
+            stop_words=["StopA:"],
+        )
+
+        exec_b = AgentExecutor(
+            **base_deps,
+            agent=agent_b,
+            stop_words=["StopB:"],
+        )
+
+        # Shared LLM must be unmodified
+        assert shared_llm.stop == original_stop
+
+        # Each executor should have its own LLM copy with the correct merged stop words
+        assert exec_a.llm is not shared_llm
+        assert exec_b.llm is not shared_llm
+        assert exec_a.llm is not exec_b.llm
+
+        # exec_a should have Original: + StopA:
+        assert "Original:" in exec_a.llm.stop
+        assert "StopA:" in exec_a.llm.stop
+        assert "StopB:" not in exec_a.llm.stop
+
+        # exec_b should have Original: + StopB:
+        assert "Original:" in exec_b.llm.stop
+        assert "StopB:" in exec_b.llm.stop
+        assert "StopA:" not in exec_b.llm.stop
+
+    def test_executor_no_copy_when_no_new_stop_words(self, shared_llm, base_deps):
+        """Executor should not copy LLM when stop_words don't add anything new."""
+        # The shared LLM already has ["Original:"]
+        agent = Mock()
+        agent.id = "agent-1"
+        agent.role = "Agent 1"
+        agent.verbose = False
+        agent.key = "key-1"
+
+        executor = AgentExecutor(
+            **base_deps,
+            agent=agent,
+            stop_words=[],  # No new stop words
+        )
+
+        # When no new stop words are added, the LLM should not be copied
+        assert executor.llm is shared_llm
+
+    def test_stop_words_persist_across_multiple_kickoffs(self, shared_llm, base_deps):
+        """Stop words should not accumulate across multiple executor creations
+        (simulating multiple crew.kickoff() calls)."""
+        original_stop = list(shared_llm.stop)
+
+        for i in range(5):
+            agent = Mock()
+            agent.id = f"agent-{i}"
+            agent.role = f"Agent {i}"
+            agent.verbose = False
+            agent.key = f"key-{i}"
+
+            AgentExecutor(
+                **base_deps,
+                agent=agent,
+                stop_words=["Observation:"],
+            )
+
+        # After 5 executor creations, the shared LLM's stop words must be unchanged
+        assert shared_llm.stop == original_stop