diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index 0707f59d6..9e36b4d91 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -10,6 +10,7 @@ import asyncio from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor, as_completed import contextvars +import copy import inspect import logging from typing import TYPE_CHECKING, Any, Literal, cast @@ -162,15 +163,21 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): self.before_llm_call_hooks.extend(get_before_llm_call_hooks()) self.after_llm_call_hooks.extend(get_after_llm_call_hooks()) if self.llm: - # This may be mutating the shared llm object and needs further evaluation - existing_stop = getattr(self.llm, "stop", []) - self.llm.stop = list( + # Create a shallow copy of the LLM to avoid mutating the shared + # instance's stop words. When multiple agents share the same LLM, + # directly mutating stop words causes cross-agent state pollution + # where stop words accumulate across agents. (see #5141) + existing_stop = getattr(self.llm, "stop", []) or [] + merged_stop = list( set( existing_stop + self.stop if isinstance(existing_stop, list) else self.stop ) ) + if merged_stop != (existing_stop if isinstance(existing_stop, list) else []): + self.llm = copy.copy(self.llm) + self.llm.stop = merged_stop @property def use_stop_words(self) -> bool: diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index b785a102e..3e1e859ae 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -4,6 +4,7 @@ import asyncio from collections.abc import Callable, Coroutine from concurrent.futures import ThreadPoolExecutor, as_completed import contextvars +import copy from datetime import datetime import inspect import json @@ -256,14 +257,21 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin): self.after_llm_call_hooks.extend(get_after_llm_call_hooks()) if self.llm: - existing_stop = getattr(self.llm, "stop", []) - self.llm.stop = list( + # Create a shallow copy of the LLM to avoid mutating the shared + # instance's stop words. When multiple agents share the same LLM, + # directly mutating stop words causes cross-agent state pollution + # where stop words accumulate across agents. (see #5141) + existing_stop = getattr(self.llm, "stop", []) or [] + merged_stop = list( set( existing_stop + self.stop if isinstance(existing_stop, list) else self.stop ) ) + if merged_stop != (existing_stop if isinstance(existing_stop, list) else []): + self.llm = copy.copy(self.llm) + self.llm.stop = merged_stop self._state = AgentExecutorState() # Plan-and-Execute components (Phase 2) diff --git a/lib/crewai/tests/agents/test_agent.py b/lib/crewai/tests/agents/test_agent.py index d865ec541..38cca1c87 100644 --- a/lib/crewai/tests/agents/test_agent.py +++ b/lib/crewai/tests/agents/test_agent.py @@ -2421,3 +2421,84 @@ def test_agent_mcps_accepts_legacy_prefix_with_tool(): mcps=["crewai-amp:notion#get_page"], ) assert agent.mcps == ["crewai-amp:notion#get_page"] + + +class TestCrewAgentExecutorSharedLLMStopWords: + """Regression tests for shared LLM stop words mutation in CrewAgentExecutor (issue #5141). + + When multiple agents share the same LLM instance, each CrewAgentExecutor + should NOT mutate the shared LLM's stop words. + """ + + def _make_executor(self, llm, stop_words): + """Helper to create a CrewAgentExecutor with minimal deps.""" + from crewai.agents.tools_handler import ToolsHandler + + agent = Agent( + role="test role", + goal="test goal", + backstory="test backstory", + ) + task = Task( + description="Test task", + expected_output="Test output", + agent=agent, + ) + return CrewAgentExecutor( + agent=agent, + task=task, + llm=llm, + crew=None, + prompt={"prompt": "Test {input} {tool_names} {tools}"}, + max_iter=5, + tools=[], + tools_names="", + stop_words=stop_words, + tools_description="", + tools_handler=ToolsHandler(), + ) + + def test_shared_llm_not_mutated(self): + """Creating a CrewAgentExecutor should NOT mutate the shared LLM's stop words.""" + shared_llm = LLM(model="gpt-4", stop=["Original:"]) + original_stop = list(shared_llm.stop) + + self._make_executor(shared_llm, stop_words=["Observation:"]) + + assert shared_llm.stop == original_stop + + def test_multiple_executors_isolate_stop_words(self): + """Multiple executors sharing an LLM should each have isolated stop words.""" + shared_llm = LLM(model="gpt-4", stop=["Original:"]) + original_stop = list(shared_llm.stop) + + exec_a = self._make_executor(shared_llm, stop_words=["StopA:"]) + exec_b = self._make_executor(shared_llm, stop_words=["StopB:"]) + + # Shared LLM must be unmodified + assert shared_llm.stop == original_stop + + # Each executor should have its own LLM copy + assert exec_a.llm is not shared_llm + assert exec_b.llm is not shared_llm + assert exec_a.llm is not exec_b.llm + + # exec_a should have Original: + StopA: only + assert "Original:" in exec_a.llm.stop + assert "StopA:" in exec_a.llm.stop + assert "StopB:" not in exec_a.llm.stop + + # exec_b should have Original: + StopB: only + assert "Original:" in exec_b.llm.stop + assert "StopB:" in exec_b.llm.stop + assert "StopA:" not in exec_b.llm.stop + + def test_stop_words_do_not_accumulate_across_kickoffs(self): + """Simulating multiple kickoffs: stop words must not grow on the shared LLM.""" + shared_llm = LLM(model="gpt-4", stop=["Original:"]) + original_stop = list(shared_llm.stop) + + for _ in range(5): + self._make_executor(shared_llm, stop_words=["Observation:"]) + + assert shared_llm.stop == original_stop diff --git a/lib/crewai/tests/agents/test_agent_executor.py b/lib/crewai/tests/agents/test_agent_executor.py index c845bd458..ad29e3e46 100644 --- a/lib/crewai/tests/agents/test_agent_executor.py +++ b/lib/crewai/tests/agents/test_agent_executor.py @@ -2002,3 +2002,150 @@ class TestVisionImageFormatContract: assert hasattr(AnthropicCompletion, "_convert_image_blocks"), ( "Anthropic provider must have _convert_image_blocks for auto-conversion" ) + + +class TestSharedLLMStopWordsMutation: + """Regression tests for shared LLM stop words mutation (issue #5141). + + When multiple agents share the same LLM instance, each executor should + NOT mutate the shared LLM's stop words. Instead, each executor should + get its own copy of the LLM with the merged stop words. + """ + + @pytest.fixture + def shared_llm(self): + """Create a shared mock LLM with initial stop words.""" + llm = Mock() + llm.supports_stop_words.return_value = True + llm.stop = ["Original:"] + return llm + + @pytest.fixture + def base_deps(self, shared_llm): + """Create base dependencies using the shared LLM.""" + task = Mock() + task.description = "Test task" + + crew = Mock() + crew.verbose = False + crew._train = False + + prompt = {"prompt": "Test {input}"} + + return { + "llm": shared_llm, + "task": task, + "crew": crew, + "prompt": prompt, + "max_iter": 10, + "tools": [], + "tools_names": "", + "tools_description": "", + "tools_handler": Mock(), + } + + def test_shared_llm_not_mutated_by_executor(self, shared_llm, base_deps): + """Creating an executor should NOT mutate the shared LLM's stop words.""" + original_stop = list(shared_llm.stop) + + agent = Mock() + agent.id = "agent-1" + agent.role = "Agent 1" + agent.verbose = False + agent.key = "key-1" + + AgentExecutor( + **base_deps, + agent=agent, + stop_words=["Observation:"], + ) + + # The shared LLM's stop words must remain unchanged + assert shared_llm.stop == original_stop + + def test_multiple_executors_do_not_accumulate_stop_words( + self, shared_llm, base_deps + ): + """Multiple executors sharing an LLM should not accumulate stop words.""" + original_stop = list(shared_llm.stop) + + agent_a = Mock() + agent_a.id = "agent-a" + agent_a.role = "Agent A" + agent_a.verbose = False + agent_a.key = "key-a" + + agent_b = Mock() + agent_b.id = "agent-b" + agent_b.role = "Agent B" + agent_b.verbose = False + agent_b.key = "key-b" + + exec_a = AgentExecutor( + **base_deps, + agent=agent_a, + stop_words=["StopA:"], + ) + + exec_b = AgentExecutor( + **base_deps, + agent=agent_b, + stop_words=["StopB:"], + ) + + # Shared LLM must be unmodified + assert shared_llm.stop == original_stop + + # Each executor should have its own LLM copy with the correct merged stop words + assert exec_a.llm is not shared_llm + assert exec_b.llm is not shared_llm + assert exec_a.llm is not exec_b.llm + + # exec_a should have Original: + StopA: + assert "Original:" in exec_a.llm.stop + assert "StopA:" in exec_a.llm.stop + assert "StopB:" not in exec_a.llm.stop + + # exec_b should have Original: + StopB: + assert "Original:" in exec_b.llm.stop + assert "StopB:" in exec_b.llm.stop + assert "StopA:" not in exec_b.llm.stop + + def test_executor_no_copy_when_no_new_stop_words(self, shared_llm, base_deps): + """Executor should not copy LLM when stop_words don't add anything new.""" + # The shared LLM already has ["Original:"] + agent = Mock() + agent.id = "agent-1" + agent.role = "Agent 1" + agent.verbose = False + agent.key = "key-1" + + executor = AgentExecutor( + **base_deps, + agent=agent, + stop_words=[], # No new stop words + ) + + # When no new stop words are added, the LLM should not be copied + assert executor.llm is shared_llm + + def test_stop_words_persist_across_multiple_kickoffs(self, shared_llm, base_deps): + """Stop words should not accumulate across multiple executor creations + (simulating multiple crew.kickoff() calls).""" + original_stop = list(shared_llm.stop) + + for i in range(5): + agent = Mock() + agent.id = f"agent-{i}" + agent.role = f"Agent {i}" + agent.verbose = False + agent.key = f"key-{i}" + + AgentExecutor( + **base_deps, + agent=agent, + stop_words=["Observation:"], + ) + + # After 5 executor creations, the shared LLM's stop words must be unchanged + assert shared_llm.stop == original_stop