fix: prevent LLM stop words mutation causing output truncation in crew.kickoff()

The CrewAgentExecutor.__init__ was permanently mutating the shared LLM object's stop attribute by adding executor-specific stop words (e.g. '\nObservation:'). This caused BaseLLM._apply_stop_words() to truncate ALL subsequent LLM responses at those stop words, even when stop words were not intended for that call. The fix: - Stop words are now set on the LLM only for the duration of each invoke/ainvoke call and restored afterward via _set_llm_stop_words() and _restore_llm_stop_words() methods. - _update_executor_parameters in Agent.core no longer mutates the shared LLM's stop attribute. Fixes #4603 Co-Authored-By: João <joao@crewai.com>
2026-04-13 06:23:03 +00:00 · 2026-02-26 05:18:21 +00:00
3 changed files with 532 additions and 16 deletions
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -906,14 +906,14 @@ class Agent(BaseAgent):
        self.agent_executor.tools_handler = self.tools_handler
        self.agent_executor.request_within_rpm_limit = rpm_limit_fn

+        # Update the executor's saved original stop words so that
+        # _set_llm_stop_words / _restore_llm_stop_words work correctly.
+        # We intentionally do NOT mutate self.agent_executor.llm.stop here
+        # to avoid polluting the shared LLM object across executor lifecycles
+        # (see https://github.com/crewAIInc/crewAI/issues/4603).
        if self.agent_executor.llm:
-            existing_stop = getattr(self.agent_executor.llm, "stop", [])
-            self.agent_executor.llm.stop = list(
-                set(
-                    existing_stop + stop_words
-                    if isinstance(existing_stop, list)
-                    else stop_words
-                )
+            self.agent_executor._original_llm_stop = list(
+                getattr(self.agent_executor.llm, "stop", []) or []
            )

    def get_delegation_tools(self, agents: list[BaseAgent]) -> list[BaseTool]:
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -160,16 +160,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self.after_llm_call_hooks: list[Callable[..., Any]] = []
        self.before_llm_call_hooks.extend(get_before_llm_call_hooks())
        self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
+        # Store the LLM's original stop words so we can restore them after execution.
+        # We must NOT mutate the shared LLM object's stop attribute because it persists
+        # across executor lifecycles and causes output truncation in subsequent calls
+        # (see https://github.com/crewAIInc/crewAI/issues/4603).
+        self._original_llm_stop: list[str] | None = None
        if self.llm:
-            # This may be mutating the shared llm object and needs further evaluation
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
-                set(
-                    existing_stop + self.stop
-                    if isinstance(existing_stop, list)
-                    else self.stop
-                )
-            )
+            self._original_llm_stop = list(getattr(self.llm, "stop", []) or [])

    @property
    def use_stop_words(self) -> bool:
@@ -205,6 +202,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        provider.post_setup_messages(cast(ExecutorContext, cast(object, self)))

+    def _set_llm_stop_words(self) -> None:
+        """Temporarily set stop words on the LLM for this execution.
+
+        Merges the executor's stop words with the LLM's original stop words
+        for the duration of the execution. Must be paired with
+        _restore_llm_stop_words to avoid polluting the shared LLM object.
+        """
+        if self.llm and self.stop:
+            existing_stop = getattr(self.llm, "stop", []) or []
+            if isinstance(existing_stop, list):
+                merged = list(set(existing_stop + self.stop))
+            else:
+                merged = list(set(self.stop))
+            self.llm.stop = merged
+
+    def _restore_llm_stop_words(self) -> None:
+        """Restore the LLM's original stop words after execution.
+
+        This ensures the shared LLM object is not polluted with executor-specific
+        stop words that would cause output truncation in subsequent calls.
+        """
+        if self.llm and self._original_llm_stop is not None:
+            self.llm.stop = list(self._original_llm_stop)
+
    def invoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
        """Execute the agent with given inputs.

@@ -222,6 +243,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))

+        self._set_llm_stop_words()
        try:
            formatted_answer = self._invoke_loop()
        except AssertionError:
@@ -234,6 +256,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        except Exception as e:
            handle_unknown_error(self._printer, e, verbose=self.agent.verbose)
            raise
+        finally:
+            self._restore_llm_stop_words()

        if self.ask_for_human_input:
            formatted_answer = self._handle_human_feedback(formatted_answer)
@@ -1119,6 +1143,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))

+        self._set_llm_stop_words()
        try:
            formatted_answer = await self._ainvoke_loop()
        except AssertionError:
@@ -1131,6 +1156,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        except Exception as e:
            handle_unknown_error(self._printer, e, verbose=self.agent.verbose)
            raise
+        finally:
+            self._restore_llm_stop_words()

        if self.ask_for_human_input:
            formatted_answer = await self._ahandle_human_feedback(formatted_answer)
--- a/lib/crewai/tests/agents/test_stop_words_mutation.py
+++ b/lib/crewai/tests/agents/test_stop_words_mutation.py
@@ -0,0 +1,489 @@
+"""Tests for LLM stop words mutation fix (issue #4603).
+
+Verifies that CrewAgentExecutor does not permanently mutate the shared LLM
+object's stop words, which caused output truncation in crew.kickoff() when
+the same LLM was reused across multiple executor lifecycles.
+"""
+
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
+from crewai.agents.crew_agent_executor import CrewAgentExecutor
+from crewai.agents.parser import AgentFinish
+
+
+@pytest.fixture
+def mock_llm():
+    """Create a mock LLM with stop words support."""
+    llm = MagicMock()
+    llm.stop = []
+    llm.supports_stop_words.return_value = True
+    llm.supports_function_calling.return_value = False
+    return llm
+
+
+@pytest.fixture
+def mock_agent():
+    """Create a mock agent."""
+    agent = MagicMock()
+    agent.id = "test-agent"
+    agent.role = "Test Agent"
+    agent.verbose = False
+    agent.key = "test-key"
+    agent.security_config = None
+    return agent
+
+
+@pytest.fixture
+def mock_task():
+    """Create a mock task."""
+    task = MagicMock()
+    task.description = "Test task"
+    task.human_input = False
+    task.response_model = None
+    return task
+
+
+@pytest.fixture
+def mock_crew():
+    """Create a mock crew."""
+    crew = MagicMock()
+    crew.verbose = False
+    crew._train = False
+    crew._memory = None
+    return crew
+
+
+@pytest.fixture
+def executor_kwargs(mock_llm, mock_agent, mock_task, mock_crew):
+    """Create default kwargs for CrewAgentExecutor."""
+    return {
+        "llm": mock_llm,
+        "task": mock_task,
+        "agent": mock_agent,
+        "crew": mock_crew,
+        "prompt": {"prompt": "Test {input} {tool_names} {tools}"},
+        "max_iter": 10,
+        "tools": [],
+        "tools_names": "",
+        "stop_words": ["\nObservation:"],
+        "tools_description": "",
+        "tools_handler": MagicMock(),
+        "original_tools": [],
+    }
+
+
+class TestStopWordsMutationFix:
+    """Tests that the executor does not permanently mutate the shared LLM's stop words."""
+
+    def test_executor_init_does_not_mutate_llm_stop(self, executor_kwargs, mock_llm):
+        """Verify __init__ does not set stop words on the LLM object."""
+        original_stop = list(mock_llm.stop)
+
+        CrewAgentExecutor(**executor_kwargs)
+
+        # The LLM's stop words should remain unchanged after init
+        assert mock_llm.stop == original_stop
+
+    def test_executor_saves_original_llm_stop(self, executor_kwargs, mock_llm):
+        """Verify __init__ saves the LLM's original stop words."""
+        mock_llm.stop = ["existing_stop"]
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        assert executor._original_llm_stop == ["existing_stop"]
+
+    def test_executor_saves_empty_original_stop(self, executor_kwargs, mock_llm):
+        """Verify __init__ handles empty stop words."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        assert executor._original_llm_stop == []
+
+    def test_set_llm_stop_words_merges_correctly(self, executor_kwargs, mock_llm):
+        """Verify _set_llm_stop_words merges executor stop words with LLM's."""
+        mock_llm.stop = ["existing"]
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        executor._set_llm_stop_words()
+
+        # Should contain both original and executor stop words
+        assert set(mock_llm.stop) == {"existing", "\nObservation:"}
+
+    def test_restore_llm_stop_words(self, executor_kwargs, mock_llm):
+        """Verify _restore_llm_stop_words restores original stop words."""
+        mock_llm.stop = ["original_stop"]
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        # Simulate what happens during execution
+        executor._set_llm_stop_words()
+        assert "\nObservation:" in mock_llm.stop
+
+        executor._restore_llm_stop_words()
+        assert mock_llm.stop == ["original_stop"]
+
+    def test_invoke_restores_stop_words_after_success(self, executor_kwargs, mock_llm):
+        """Verify invoke restores stop words after successful execution."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        # Mock the invoke loop to return a simple finish
+        with patch.object(
+            executor, "_invoke_loop", return_value=AgentFinish(
+                thought="", output="done", text="done"
+            )
+        ), patch.object(executor, "_setup_messages"), \
+           patch.object(executor, "_inject_multimodal_files"), \
+           patch.object(executor, "_show_start_logs"), \
+           patch.object(executor, "_save_to_memory"):
+            executor.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # After invoke completes, LLM stop words should be restored to empty
+        assert mock_llm.stop == []
+
+    def test_invoke_restores_stop_words_after_exception(self, executor_kwargs, mock_llm):
+        """Verify invoke restores stop words even when an exception occurs."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        # Mock the invoke loop to raise an exception
+        with patch.object(executor, "_invoke_loop", side_effect=RuntimeError("boom")), \
+             patch.object(executor, "_setup_messages"), \
+             patch.object(executor, "_inject_multimodal_files"), \
+             patch.object(executor, "_show_start_logs"):
+            with pytest.raises(RuntimeError, match="boom"):
+                executor.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # Even after exception, LLM stop words should be restored
+        assert mock_llm.stop == []
+
+    @pytest.mark.asyncio
+    async def test_ainvoke_restores_stop_words_after_success(self, executor_kwargs, mock_llm):
+        """Verify ainvoke restores stop words after successful execution."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        # Mock the async invoke loop to return a simple finish
+        with patch.object(
+            executor, "_ainvoke_loop", return_value=AgentFinish(
+                thought="", output="done", text="done"
+            )
+        ), patch.object(executor, "_setup_messages"), \
+           patch.object(executor, "_ainject_multimodal_files"), \
+           patch.object(executor, "_show_start_logs"), \
+           patch.object(executor, "_save_to_memory"):
+            await executor.ainvoke({"input": "test", "tool_names": "", "tools": ""})
+
+        assert mock_llm.stop == []
+
+    @pytest.mark.asyncio
+    async def test_ainvoke_restores_stop_words_after_exception(self, executor_kwargs, mock_llm):
+        """Verify ainvoke restores stop words even when an exception occurs."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        async def raise_error():
+            raise RuntimeError("async boom")
+
+        with patch.object(executor, "_ainvoke_loop", side_effect=raise_error), \
+             patch.object(executor, "_setup_messages"), \
+             patch.object(executor, "_ainject_multimodal_files"), \
+             patch.object(executor, "_show_start_logs"):
+            with pytest.raises(RuntimeError, match="async boom"):
+                await executor.ainvoke({"input": "test", "tool_names": "", "tools": ""})
+
+        assert mock_llm.stop == []
+
+
+class TestSharedLLMNotPolluted:
+    """Tests that a shared LLM object is not polluted across multiple executor instances."""
+
+    def test_multiple_executors_do_not_accumulate_stop_words(
+        self, executor_kwargs, mock_llm
+    ):
+        """Verify creating multiple executors doesn't accumulate stop words on LLM."""
+        mock_llm.stop = []
+
+        # Create first executor with stop words
+        executor1 = CrewAgentExecutor(**executor_kwargs)
+        with patch.object(
+            executor1, "_invoke_loop", return_value=AgentFinish(
+                thought="", output="done", text="done"
+            )
+        ), patch.object(executor1, "_setup_messages"), \
+           patch.object(executor1, "_inject_multimodal_files"), \
+           patch.object(executor1, "_show_start_logs"), \
+           patch.object(executor1, "_save_to_memory"):
+            executor1.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # LLM should be clean after first executor
+        assert mock_llm.stop == []
+
+        # Create second executor
+        executor2 = CrewAgentExecutor(**executor_kwargs)
+        with patch.object(
+            executor2, "_invoke_loop", return_value=AgentFinish(
+                thought="", output="done2", text="done2"
+            )
+        ), patch.object(executor2, "_setup_messages"), \
+           patch.object(executor2, "_inject_multimodal_files"), \
+           patch.object(executor2, "_show_start_logs"), \
+           patch.object(executor2, "_save_to_memory"):
+            executor2.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # LLM should still be clean after second executor
+        assert mock_llm.stop == []
+
+    def test_llm_stop_words_only_set_during_execution(
+        self, executor_kwargs, mock_llm
+    ):
+        """Verify stop words are only on the LLM during active execution."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        stop_words_during_execution = []
+
+        def capture_stop_words():
+            # Capture what the LLM's stop words are during execution
+            stop_words_during_execution.append(list(mock_llm.stop))
+            return AgentFinish(thought="", output="done", text="done")
+
+        with patch.object(executor, "_invoke_loop", side_effect=capture_stop_words), \
+             patch.object(executor, "_setup_messages"), \
+             patch.object(executor, "_inject_multimodal_files"), \
+             patch.object(executor, "_show_start_logs"), \
+             patch.object(executor, "_save_to_memory"):
+            executor.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # During execution, stop words should have been set
+        assert len(stop_words_during_execution) == 1
+        assert "\nObservation:" in stop_words_during_execution[0]
+
+        # After execution, stop words should be restored
+        assert mock_llm.stop == []
+
+    def test_user_configured_stop_words_preserved(self, executor_kwargs, mock_llm):
+        """Verify user-configured stop words on the LLM are preserved."""
+        mock_llm.stop = ["UserStop1", "UserStop2"]
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        with patch.object(
+            executor, "_invoke_loop", return_value=AgentFinish(
+                thought="", output="done", text="done"
+            )
+        ), patch.object(executor, "_setup_messages"), \
+           patch.object(executor, "_inject_multimodal_files"), \
+           patch.object(executor, "_show_start_logs"), \
+           patch.object(executor, "_save_to_memory"):
+            executor.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # User's original stop words should be preserved
+        assert mock_llm.stop == ["UserStop1", "UserStop2"]
+
+
+class TestUpdateExecutorParameters:
+    """Tests for _update_executor_parameters not mutating shared LLM stop words."""
+
+    def test_update_parameters_does_not_mutate_llm_stop(
+        self, executor_kwargs, mock_llm
+    ):
+        """Verify _update_executor_parameters does not set stop on LLM."""
+        mock_llm.stop = []
+        executor = CrewAgentExecutor(**executor_kwargs)
+
+        # Simulate what Agent.create_agent_executor does on update
+        executor.task = executor_kwargs["task"]
+        executor.tools = []
+        executor.original_tools = []
+        executor.prompt = executor_kwargs["prompt"]
+        executor.stop = ["\nObservation:"]
+        executor.tools_names = ""
+        executor.tools_description = ""
+
+        # Update the saved original stop words (what the fix does)
+        executor._original_llm_stop = list(getattr(mock_llm, "stop", []) or [])
+
+        # LLM stop should not be modified
+        assert mock_llm.stop == []
+
+    def test_sequential_crew_executions_no_stop_word_leak(self, mock_llm):
+        """Simulate multiple sequential crew executions sharing an LLM.
+
+        This is the core reproduction of issue #4603: when crew.kickoff()
+        is called multiple times (e.g., in a workflow), the LLM's stop words
+        should not leak between executions.
+        """
+        mock_llm.stop = []
+
+        for i in range(3):
+            agent = MagicMock()
+            agent.id = f"agent-{i}"
+            agent.role = f"Agent {i}"
+            agent.verbose = False
+            agent.key = f"key-{i}"
+            agent.security_config = None
+
+            task = MagicMock()
+            task.description = f"Task {i}"
+            task.human_input = False
+            task.response_model = None
+
+            crew = MagicMock()
+            crew.verbose = False
+            crew._train = False
+            crew._memory = None
+
+            executor = CrewAgentExecutor(
+                llm=mock_llm,
+                task=task,
+                agent=agent,
+                crew=crew,
+                prompt={"prompt": "Test {input} {tool_names} {tools}"},
+                max_iter=10,
+                tools=[],
+                tools_names="",
+                stop_words=["\nObservation:"],
+                tools_description="",
+                tools_handler=MagicMock(),
+                original_tools=[],
+            )
+
+            # Simulate execution
+            with patch.object(
+                executor, "_invoke_loop", return_value=AgentFinish(
+                    thought="", output=f"result-{i}", text=f"result-{i}"
+                )
+            ), patch.object(executor, "_setup_messages"), \
+               patch.object(executor, "_inject_multimodal_files"), \
+               patch.object(executor, "_show_start_logs"), \
+               patch.object(executor, "_save_to_memory"):
+                result = executor.invoke(
+                    {"input": "test", "tool_names": "", "tools": ""}
+                )
+                assert result["output"] == f"result-{i}"
+
+            # After each execution, LLM's stop words should be clean
+            assert mock_llm.stop == [], (
+                f"LLM stop words leaked after execution {i}: {mock_llm.stop}"
+            )
+
+
+class TestApplyStopWordsInteraction:
+    """Tests that _apply_stop_words on the LLM doesn't truncate after executor cleanup."""
+
+    def test_apply_stop_words_not_triggered_after_restore(self, mock_llm):
+        """Verify that after restoring stop words, _apply_stop_words doesn't truncate."""
+        mock_llm.stop = []
+
+        agent = MagicMock()
+        agent.id = "test-agent"
+        agent.role = "Test"
+        agent.verbose = False
+        agent.key = "key"
+        agent.security_config = None
+
+        task = MagicMock()
+        task.description = "Test"
+        task.human_input = False
+        task.response_model = None
+
+        crew = MagicMock()
+        crew.verbose = False
+        crew._train = False
+        crew._memory = None
+
+        executor = CrewAgentExecutor(
+            llm=mock_llm,
+            task=task,
+            agent=agent,
+            crew=crew,
+            prompt={"prompt": "Test {input} {tool_names} {tools}"},
+            max_iter=10,
+            tools=[],
+            tools_names="",
+            stop_words=["\nObservation:"],
+            tools_description="",
+            tools_handler=MagicMock(),
+            original_tools=[],
+        )
+
+        # Simulate execution and restore
+        with patch.object(
+            executor, "_invoke_loop", return_value=AgentFinish(
+                thought="", output="done", text="done"
+            )
+        ), patch.object(executor, "_setup_messages"), \
+           patch.object(executor, "_inject_multimodal_files"), \
+           patch.object(executor, "_show_start_logs"), \
+           patch.object(executor, "_save_to_memory"):
+            executor.invoke({"input": "test", "tool_names": "", "tools": ""})
+
+        # Now simulate a subsequent LLM call (e.g., from another crew)
+        # The response contains "\nObservation:" but should NOT be truncated
+        # because the stop words have been restored to empty
+        long_response = (
+            "```json\n"
+            '{"analysis": "This is a detailed analysis with many findings. '
+            "The data shows significant trends across all metrics. "
+            "We observed multiple patterns including seasonal variations.\n"
+            'Observation: The key finding is that performance improved by 25%."}\n'
+            "```"
+        )
+
+        # With the fix, the LLM's stop words should be empty
+        assert mock_llm.stop == []
+
+        # Simulate what _apply_stop_words would do
+        # (testing the concept - the actual method is on BaseLLM)
+        content = long_response
+        if mock_llm.stop:
+            for stop_word in mock_llm.stop:
+                pos = content.find(stop_word)
+                if pos != -1:
+                    content = content[:pos].strip()
+
+        # The full response should be preserved (not truncated)
+        assert content == long_response
+        assert len(content) > 200  # Should be the full response
+
+
+class TestEdgeCases:
+    """Test edge cases for the stop words fix."""
+
+    def test_executor_with_no_stop_words(self, executor_kwargs, mock_llm):
+        """Verify executor works correctly when no stop words are provided."""
+        executor_kwargs["stop_words"] = []
+        mock_llm.stop = []
+
+        executor = CrewAgentExecutor(**executor_kwargs)
+        executor._set_llm_stop_words()
+
+        # No stop words should be set
+        assert mock_llm.stop == []
+
+        executor._restore_llm_stop_words()
+        assert mock_llm.stop == []
+
+    def test_executor_with_none_llm_stop(self, executor_kwargs, mock_llm):
+        """Verify executor handles None stop words on LLM."""
+        mock_llm.stop = None
+
+        executor = CrewAgentExecutor(**executor_kwargs)
+        assert executor._original_llm_stop == []
+
+        executor._set_llm_stop_words()
+        assert "\nObservation:" in mock_llm.stop
+
+        executor._restore_llm_stop_words()
+        assert mock_llm.stop == []
+
+    def test_executor_with_no_llm(self, executor_kwargs):
+        """Verify executor handles None LLM gracefully."""
+        executor_kwargs["llm"] = None
+
+        executor = CrewAgentExecutor(**executor_kwargs)
+        assert executor._original_llm_stop is None
+
+        # These should not raise
+        executor._set_llm_stop_words()
+        executor._restore_llm_stop_words()