fix: prevent shared LLM stop words mutation across agents (#5141)

When multiple agents share the same LLM instance, the executor was directly mutating the shared LLM's stop words list. This caused cross-agent state pollution where stop words would accumulate across agents and across multiple crew.kickoff() calls. Fix: create a shallow copy of the LLM before merging stop words, so each executor gets its own isolated stop word list. Only copy when new stop words are actually being added to avoid unnecessary allocations. Applied to both CrewAgentExecutor and experimental AgentExecutor. Co-Authored-By: João <joao@crewai.com>
2026-07-01 21:28:10 +00:00 · 2026-03-27 14:11:03 +00:00
parent 9fe0c15549
commit efd5c90fd2
4 changed files with 248 additions and 5 deletions
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -10,6 +10,7 @@ import asyncio
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import contextvars
+import copy
 import inspect
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -162,15 +163,21 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self.before_llm_call_hooks.extend(get_before_llm_call_hooks())
        self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
        if self.llm:
-            # This may be mutating the shared llm object and needs further evaluation
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
+            # Create a shallow copy of the LLM to avoid mutating the shared
+            # instance's stop words. When multiple agents share the same LLM,
+            # directly mutating stop words causes cross-agent state pollution
+            # where stop words accumulate across agents. (see #5141)
+            existing_stop = getattr(self.llm, "stop", []) or []
+            merged_stop = list(
                set(
                    existing_stop + self.stop
                    if isinstance(existing_stop, list)
                    else self.stop
                )
            )
+            if merged_stop != (existing_stop if isinstance(existing_stop, list) else []):
+                self.llm = copy.copy(self.llm)
+                self.llm.stop = merged_stop

    @property
    def use_stop_words(self) -> bool:
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -4,6 +4,7 @@ import asyncio
 from collections.abc import Callable, Coroutine
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import contextvars
+import copy
 from datetime import datetime
 import inspect
 import json
@@ -256,14 +257,21 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
        self.after_llm_call_hooks.extend(get_after_llm_call_hooks())

        if self.llm:
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
+            # Create a shallow copy of the LLM to avoid mutating the shared
+            # instance's stop words. When multiple agents share the same LLM,
+            # directly mutating stop words causes cross-agent state pollution
+            # where stop words accumulate across agents. (see #5141)
+            existing_stop = getattr(self.llm, "stop", []) or []
+            merged_stop = list(
                set(
                    existing_stop + self.stop
                    if isinstance(existing_stop, list)
                    else self.stop
                )
            )
+            if merged_stop != (existing_stop if isinstance(existing_stop, list) else []):
+                self.llm = copy.copy(self.llm)
+                self.llm.stop = merged_stop
        self._state = AgentExecutorState()

        # Plan-and-Execute components (Phase 2)
--- a/lib/crewai/tests/agents/test_agent.py
+++ b/lib/crewai/tests/agents/test_agent.py
@@ -2421,3 +2421,84 @@ def test_agent_mcps_accepts_legacy_prefix_with_tool():
        mcps=["crewai-amp:notion#get_page"],
    )
    assert agent.mcps == ["crewai-amp:notion#get_page"]
+
+
+class TestCrewAgentExecutorSharedLLMStopWords:
+    """Regression tests for shared LLM stop words mutation in CrewAgentExecutor (issue #5141).
+
+    When multiple agents share the same LLM instance, each CrewAgentExecutor
+    should NOT mutate the shared LLM's stop words.
+    """
+
+    def _make_executor(self, llm, stop_words):
+        """Helper to create a CrewAgentExecutor with minimal deps."""
+        from crewai.agents.tools_handler import ToolsHandler
+
+        agent = Agent(
+            role="test role",
+            goal="test goal",
+            backstory="test backstory",
+        )
+        task = Task(
+            description="Test task",
+            expected_output="Test output",
+            agent=agent,
+        )
+        return CrewAgentExecutor(
+            agent=agent,
+            task=task,
+            llm=llm,
+            crew=None,
+            prompt={"prompt": "Test {input} {tool_names} {tools}"},
+            max_iter=5,
+            tools=[],
+            tools_names="",
+            stop_words=stop_words,
+            tools_description="",
+            tools_handler=ToolsHandler(),
+        )
+
+    def test_shared_llm_not_mutated(self):
+        """Creating a CrewAgentExecutor should NOT mutate the shared LLM's stop words."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        self._make_executor(shared_llm, stop_words=["Observation:"])
+
+        assert shared_llm.stop == original_stop
+
+    def test_multiple_executors_isolate_stop_words(self):
+        """Multiple executors sharing an LLM should each have isolated stop words."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        exec_a = self._make_executor(shared_llm, stop_words=["StopA:"])
+        exec_b = self._make_executor(shared_llm, stop_words=["StopB:"])
+
+        # Shared LLM must be unmodified
+        assert shared_llm.stop == original_stop
+
+        # Each executor should have its own LLM copy
+        assert exec_a.llm is not shared_llm
+        assert exec_b.llm is not shared_llm
+        assert exec_a.llm is not exec_b.llm
+
+        # exec_a should have Original: + StopA: only
+        assert "Original:" in exec_a.llm.stop
+        assert "StopA:" in exec_a.llm.stop
+        assert "StopB:" not in exec_a.llm.stop
+
+        # exec_b should have Original: + StopB: only
+        assert "Original:" in exec_b.llm.stop
+        assert "StopB:" in exec_b.llm.stop
+        assert "StopA:" not in exec_b.llm.stop
+
+    def test_stop_words_do_not_accumulate_across_kickoffs(self):
+        """Simulating multiple kickoffs: stop words must not grow on the shared LLM."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        for _ in range(5):
+            self._make_executor(shared_llm, stop_words=["Observation:"])
+
+        assert shared_llm.stop == original_stop
--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
@@ -2002,3 +2002,150 @@ class TestVisionImageFormatContract:
        assert hasattr(AnthropicCompletion, "_convert_image_blocks"), (
            "Anthropic provider must have _convert_image_blocks for auto-conversion"
        )
+
+
+class TestSharedLLMStopWordsMutation:
+    """Regression tests for shared LLM stop words mutation (issue #5141).
+
+    When multiple agents share the same LLM instance, each executor should
+    NOT mutate the shared LLM's stop words. Instead, each executor should
+    get its own copy of the LLM with the merged stop words.
+    """
+
+    @pytest.fixture
+    def shared_llm(self):
+        """Create a shared mock LLM with initial stop words."""
+        llm = Mock()
+        llm.supports_stop_words.return_value = True
+        llm.stop = ["Original:"]
+        return llm
+
+    @pytest.fixture
+    def base_deps(self, shared_llm):
+        """Create base dependencies using the shared LLM."""
+        task = Mock()
+        task.description = "Test task"
+
+        crew = Mock()
+        crew.verbose = False
+        crew._train = False
+
+        prompt = {"prompt": "Test {input}"}
+
+        return {
+            "llm": shared_llm,
+            "task": task,
+            "crew": crew,
+            "prompt": prompt,
+            "max_iter": 10,
+            "tools": [],
+            "tools_names": "",
+            "tools_description": "",
+            "tools_handler": Mock(),
+        }
+
+    def test_shared_llm_not_mutated_by_executor(self, shared_llm, base_deps):
+        """Creating an executor should NOT mutate the shared LLM's stop words."""
+        original_stop = list(shared_llm.stop)
+
+        agent = Mock()
+        agent.id = "agent-1"
+        agent.role = "Agent 1"
+        agent.verbose = False
+        agent.key = "key-1"
+
+        AgentExecutor(
+            **base_deps,
+            agent=agent,
+            stop_words=["Observation:"],
+        )
+
+        # The shared LLM's stop words must remain unchanged
+        assert shared_llm.stop == original_stop
+
+    def test_multiple_executors_do_not_accumulate_stop_words(
+        self, shared_llm, base_deps
+    ):
+        """Multiple executors sharing an LLM should not accumulate stop words."""
+        original_stop = list(shared_llm.stop)
+
+        agent_a = Mock()
+        agent_a.id = "agent-a"
+        agent_a.role = "Agent A"
+        agent_a.verbose = False
+        agent_a.key = "key-a"
+
+        agent_b = Mock()
+        agent_b.id = "agent-b"
+        agent_b.role = "Agent B"
+        agent_b.verbose = False
+        agent_b.key = "key-b"
+
+        exec_a = AgentExecutor(
+            **base_deps,
+            agent=agent_a,
+            stop_words=["StopA:"],
+        )
+
+        exec_b = AgentExecutor(
+            **base_deps,
+            agent=agent_b,
+            stop_words=["StopB:"],
+        )
+
+        # Shared LLM must be unmodified
+        assert shared_llm.stop == original_stop
+
+        # Each executor should have its own LLM copy with the correct merged stop words
+        assert exec_a.llm is not shared_llm
+        assert exec_b.llm is not shared_llm
+        assert exec_a.llm is not exec_b.llm
+
+        # exec_a should have Original: + StopA:
+        assert "Original:" in exec_a.llm.stop
+        assert "StopA:" in exec_a.llm.stop
+        assert "StopB:" not in exec_a.llm.stop
+
+        # exec_b should have Original: + StopB:
+        assert "Original:" in exec_b.llm.stop
+        assert "StopB:" in exec_b.llm.stop
+        assert "StopA:" not in exec_b.llm.stop
+
+    def test_executor_no_copy_when_no_new_stop_words(self, shared_llm, base_deps):
+        """Executor should not copy LLM when stop_words don't add anything new."""
+        # The shared LLM already has ["Original:"]
+        agent = Mock()
+        agent.id = "agent-1"
+        agent.role = "Agent 1"
+        agent.verbose = False
+        agent.key = "key-1"
+
+        executor = AgentExecutor(
+            **base_deps,
+            agent=agent,
+            stop_words=[],  # No new stop words
+        )
+
+        # When no new stop words are added, the LLM should not be copied
+        assert executor.llm is shared_llm
+
+    def test_stop_words_persist_across_multiple_kickoffs(self, shared_llm, base_deps):
+        """Stop words should not accumulate across multiple executor creations
+        (simulating multiple crew.kickoff() calls)."""
+        original_stop = list(shared_llm.stop)
+
+        for i in range(5):
+            agent = Mock()
+            agent.id = f"agent-{i}"
+            agent.role = f"Agent {i}"
+            agent.verbose = False
+            agent.key = f"key-{i}"
+
+            AgentExecutor(
+                **base_deps,
+                agent=agent,
+                stop_words=["Observation:"],
+            )
+
+        # After 5 executor creations, the shared LLM's stop words must be unchanged
+        assert shared_llm.stop == original_stop