Compare commits

...

1 Commits

Author SHA1 Message Date
Devin AI
1459e6532c fix: prevent LLM stop words mutation causing output truncation in crew.kickoff()
The CrewAgentExecutor.__init__ was permanently mutating the shared LLM
object's stop attribute by adding executor-specific stop words (e.g.
'\nObservation:'). This caused BaseLLM._apply_stop_words() to truncate
ALL subsequent LLM responses at those stop words, even when stop words
were not intended for that call.

The fix:
- Stop words are now set on the LLM only for the duration of each
  invoke/ainvoke call and restored afterward via _set_llm_stop_words()
  and _restore_llm_stop_words() methods.
- _update_executor_parameters in Agent.core no longer mutates the
  shared LLM's stop attribute.

Fixes #4603

Co-Authored-By: João <joao@crewai.com>
2026-02-26 05:18:21 +00:00
3 changed files with 532 additions and 16 deletions

View File

@@ -906,14 +906,14 @@ class Agent(BaseAgent):
self.agent_executor.tools_handler = self.tools_handler
self.agent_executor.request_within_rpm_limit = rpm_limit_fn
# Update the executor's saved original stop words so that
# _set_llm_stop_words / _restore_llm_stop_words work correctly.
# We intentionally do NOT mutate self.agent_executor.llm.stop here
# to avoid polluting the shared LLM object across executor lifecycles
# (see https://github.com/crewAIInc/crewAI/issues/4603).
if self.agent_executor.llm:
existing_stop = getattr(self.agent_executor.llm, "stop", [])
self.agent_executor.llm.stop = list(
set(
existing_stop + stop_words
if isinstance(existing_stop, list)
else stop_words
)
self.agent_executor._original_llm_stop = list(
getattr(self.agent_executor.llm, "stop", []) or []
)
def get_delegation_tools(self, agents: list[BaseAgent]) -> list[BaseTool]:

View File

@@ -160,16 +160,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.after_llm_call_hooks: list[Callable[..., Any]] = []
self.before_llm_call_hooks.extend(get_before_llm_call_hooks())
self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
# Store the LLM's original stop words so we can restore them after execution.
# We must NOT mutate the shared LLM object's stop attribute because it persists
# across executor lifecycles and causes output truncation in subsequent calls
# (see https://github.com/crewAIInc/crewAI/issues/4603).
self._original_llm_stop: list[str] | None = None
if self.llm:
# This may be mutating the shared llm object and needs further evaluation
existing_stop = getattr(self.llm, "stop", [])
self.llm.stop = list(
set(
existing_stop + self.stop
if isinstance(existing_stop, list)
else self.stop
)
)
self._original_llm_stop = list(getattr(self.llm, "stop", []) or [])
@property
def use_stop_words(self) -> bool:
@@ -205,6 +202,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
provider.post_setup_messages(cast(ExecutorContext, cast(object, self)))
def _set_llm_stop_words(self) -> None:
"""Temporarily set stop words on the LLM for this execution.
Merges the executor's stop words with the LLM's original stop words
for the duration of the execution. Must be paired with
_restore_llm_stop_words to avoid polluting the shared LLM object.
"""
if self.llm and self.stop:
existing_stop = getattr(self.llm, "stop", []) or []
if isinstance(existing_stop, list):
merged = list(set(existing_stop + self.stop))
else:
merged = list(set(self.stop))
self.llm.stop = merged
def _restore_llm_stop_words(self) -> None:
"""Restore the LLM's original stop words after execution.
This ensures the shared LLM object is not polluted with executor-specific
stop words that would cause output truncation in subsequent calls.
"""
if self.llm and self._original_llm_stop is not None:
self.llm.stop = list(self._original_llm_stop)
def invoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
"""Execute the agent with given inputs.
@@ -222,6 +243,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))
self._set_llm_stop_words()
try:
formatted_answer = self._invoke_loop()
except AssertionError:
@@ -234,6 +256,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
except Exception as e:
handle_unknown_error(self._printer, e, verbose=self.agent.verbose)
raise
finally:
self._restore_llm_stop_words()
if self.ask_for_human_input:
formatted_answer = self._handle_human_feedback(formatted_answer)
@@ -1119,6 +1143,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))
self._set_llm_stop_words()
try:
formatted_answer = await self._ainvoke_loop()
except AssertionError:
@@ -1131,6 +1156,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
except Exception as e:
handle_unknown_error(self._printer, e, verbose=self.agent.verbose)
raise
finally:
self._restore_llm_stop_words()
if self.ask_for_human_input:
formatted_answer = await self._ahandle_human_feedback(formatted_answer)

View File

@@ -0,0 +1,489 @@
"""Tests for LLM stop words mutation fix (issue #4603).
Verifies that CrewAgentExecutor does not permanently mutate the shared LLM
object's stop words, which caused output truncation in crew.kickoff() when
the same LLM was reused across multiple executor lifecycles.
"""
from unittest.mock import MagicMock, Mock, patch
import pytest
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentFinish
@pytest.fixture
def mock_llm():
"""Create a mock LLM with stop words support."""
llm = MagicMock()
llm.stop = []
llm.supports_stop_words.return_value = True
llm.supports_function_calling.return_value = False
return llm
@pytest.fixture
def mock_agent():
"""Create a mock agent."""
agent = MagicMock()
agent.id = "test-agent"
agent.role = "Test Agent"
agent.verbose = False
agent.key = "test-key"
agent.security_config = None
return agent
@pytest.fixture
def mock_task():
"""Create a mock task."""
task = MagicMock()
task.description = "Test task"
task.human_input = False
task.response_model = None
return task
@pytest.fixture
def mock_crew():
"""Create a mock crew."""
crew = MagicMock()
crew.verbose = False
crew._train = False
crew._memory = None
return crew
@pytest.fixture
def executor_kwargs(mock_llm, mock_agent, mock_task, mock_crew):
"""Create default kwargs for CrewAgentExecutor."""
return {
"llm": mock_llm,
"task": mock_task,
"agent": mock_agent,
"crew": mock_crew,
"prompt": {"prompt": "Test {input} {tool_names} {tools}"},
"max_iter": 10,
"tools": [],
"tools_names": "",
"stop_words": ["\nObservation:"],
"tools_description": "",
"tools_handler": MagicMock(),
"original_tools": [],
}
class TestStopWordsMutationFix:
"""Tests that the executor does not permanently mutate the shared LLM's stop words."""
def test_executor_init_does_not_mutate_llm_stop(self, executor_kwargs, mock_llm):
"""Verify __init__ does not set stop words on the LLM object."""
original_stop = list(mock_llm.stop)
CrewAgentExecutor(**executor_kwargs)
# The LLM's stop words should remain unchanged after init
assert mock_llm.stop == original_stop
def test_executor_saves_original_llm_stop(self, executor_kwargs, mock_llm):
"""Verify __init__ saves the LLM's original stop words."""
mock_llm.stop = ["existing_stop"]
executor = CrewAgentExecutor(**executor_kwargs)
assert executor._original_llm_stop == ["existing_stop"]
def test_executor_saves_empty_original_stop(self, executor_kwargs, mock_llm):
"""Verify __init__ handles empty stop words."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
assert executor._original_llm_stop == []
def test_set_llm_stop_words_merges_correctly(self, executor_kwargs, mock_llm):
"""Verify _set_llm_stop_words merges executor stop words with LLM's."""
mock_llm.stop = ["existing"]
executor = CrewAgentExecutor(**executor_kwargs)
executor._set_llm_stop_words()
# Should contain both original and executor stop words
assert set(mock_llm.stop) == {"existing", "\nObservation:"}
def test_restore_llm_stop_words(self, executor_kwargs, mock_llm):
"""Verify _restore_llm_stop_words restores original stop words."""
mock_llm.stop = ["original_stop"]
executor = CrewAgentExecutor(**executor_kwargs)
# Simulate what happens during execution
executor._set_llm_stop_words()
assert "\nObservation:" in mock_llm.stop
executor._restore_llm_stop_words()
assert mock_llm.stop == ["original_stop"]
def test_invoke_restores_stop_words_after_success(self, executor_kwargs, mock_llm):
"""Verify invoke restores stop words after successful execution."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
# Mock the invoke loop to return a simple finish
with patch.object(
executor, "_invoke_loop", return_value=AgentFinish(
thought="", output="done", text="done"
)
), patch.object(executor, "_setup_messages"), \
patch.object(executor, "_inject_multimodal_files"), \
patch.object(executor, "_show_start_logs"), \
patch.object(executor, "_save_to_memory"):
executor.invoke({"input": "test", "tool_names": "", "tools": ""})
# After invoke completes, LLM stop words should be restored to empty
assert mock_llm.stop == []
def test_invoke_restores_stop_words_after_exception(self, executor_kwargs, mock_llm):
"""Verify invoke restores stop words even when an exception occurs."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
# Mock the invoke loop to raise an exception
with patch.object(executor, "_invoke_loop", side_effect=RuntimeError("boom")), \
patch.object(executor, "_setup_messages"), \
patch.object(executor, "_inject_multimodal_files"), \
patch.object(executor, "_show_start_logs"):
with pytest.raises(RuntimeError, match="boom"):
executor.invoke({"input": "test", "tool_names": "", "tools": ""})
# Even after exception, LLM stop words should be restored
assert mock_llm.stop == []
@pytest.mark.asyncio
async def test_ainvoke_restores_stop_words_after_success(self, executor_kwargs, mock_llm):
"""Verify ainvoke restores stop words after successful execution."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
# Mock the async invoke loop to return a simple finish
with patch.object(
executor, "_ainvoke_loop", return_value=AgentFinish(
thought="", output="done", text="done"
)
), patch.object(executor, "_setup_messages"), \
patch.object(executor, "_ainject_multimodal_files"), \
patch.object(executor, "_show_start_logs"), \
patch.object(executor, "_save_to_memory"):
await executor.ainvoke({"input": "test", "tool_names": "", "tools": ""})
assert mock_llm.stop == []
@pytest.mark.asyncio
async def test_ainvoke_restores_stop_words_after_exception(self, executor_kwargs, mock_llm):
"""Verify ainvoke restores stop words even when an exception occurs."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
async def raise_error():
raise RuntimeError("async boom")
with patch.object(executor, "_ainvoke_loop", side_effect=raise_error), \
patch.object(executor, "_setup_messages"), \
patch.object(executor, "_ainject_multimodal_files"), \
patch.object(executor, "_show_start_logs"):
with pytest.raises(RuntimeError, match="async boom"):
await executor.ainvoke({"input": "test", "tool_names": "", "tools": ""})
assert mock_llm.stop == []
class TestSharedLLMNotPolluted:
"""Tests that a shared LLM object is not polluted across multiple executor instances."""
def test_multiple_executors_do_not_accumulate_stop_words(
self, executor_kwargs, mock_llm
):
"""Verify creating multiple executors doesn't accumulate stop words on LLM."""
mock_llm.stop = []
# Create first executor with stop words
executor1 = CrewAgentExecutor(**executor_kwargs)
with patch.object(
executor1, "_invoke_loop", return_value=AgentFinish(
thought="", output="done", text="done"
)
), patch.object(executor1, "_setup_messages"), \
patch.object(executor1, "_inject_multimodal_files"), \
patch.object(executor1, "_show_start_logs"), \
patch.object(executor1, "_save_to_memory"):
executor1.invoke({"input": "test", "tool_names": "", "tools": ""})
# LLM should be clean after first executor
assert mock_llm.stop == []
# Create second executor
executor2 = CrewAgentExecutor(**executor_kwargs)
with patch.object(
executor2, "_invoke_loop", return_value=AgentFinish(
thought="", output="done2", text="done2"
)
), patch.object(executor2, "_setup_messages"), \
patch.object(executor2, "_inject_multimodal_files"), \
patch.object(executor2, "_show_start_logs"), \
patch.object(executor2, "_save_to_memory"):
executor2.invoke({"input": "test", "tool_names": "", "tools": ""})
# LLM should still be clean after second executor
assert mock_llm.stop == []
def test_llm_stop_words_only_set_during_execution(
self, executor_kwargs, mock_llm
):
"""Verify stop words are only on the LLM during active execution."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
stop_words_during_execution = []
def capture_stop_words():
# Capture what the LLM's stop words are during execution
stop_words_during_execution.append(list(mock_llm.stop))
return AgentFinish(thought="", output="done", text="done")
with patch.object(executor, "_invoke_loop", side_effect=capture_stop_words), \
patch.object(executor, "_setup_messages"), \
patch.object(executor, "_inject_multimodal_files"), \
patch.object(executor, "_show_start_logs"), \
patch.object(executor, "_save_to_memory"):
executor.invoke({"input": "test", "tool_names": "", "tools": ""})
# During execution, stop words should have been set
assert len(stop_words_during_execution) == 1
assert "\nObservation:" in stop_words_during_execution[0]
# After execution, stop words should be restored
assert mock_llm.stop == []
def test_user_configured_stop_words_preserved(self, executor_kwargs, mock_llm):
"""Verify user-configured stop words on the LLM are preserved."""
mock_llm.stop = ["UserStop1", "UserStop2"]
executor = CrewAgentExecutor(**executor_kwargs)
with patch.object(
executor, "_invoke_loop", return_value=AgentFinish(
thought="", output="done", text="done"
)
), patch.object(executor, "_setup_messages"), \
patch.object(executor, "_inject_multimodal_files"), \
patch.object(executor, "_show_start_logs"), \
patch.object(executor, "_save_to_memory"):
executor.invoke({"input": "test", "tool_names": "", "tools": ""})
# User's original stop words should be preserved
assert mock_llm.stop == ["UserStop1", "UserStop2"]
class TestUpdateExecutorParameters:
"""Tests for _update_executor_parameters not mutating shared LLM stop words."""
def test_update_parameters_does_not_mutate_llm_stop(
self, executor_kwargs, mock_llm
):
"""Verify _update_executor_parameters does not set stop on LLM."""
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
# Simulate what Agent.create_agent_executor does on update
executor.task = executor_kwargs["task"]
executor.tools = []
executor.original_tools = []
executor.prompt = executor_kwargs["prompt"]
executor.stop = ["\nObservation:"]
executor.tools_names = ""
executor.tools_description = ""
# Update the saved original stop words (what the fix does)
executor._original_llm_stop = list(getattr(mock_llm, "stop", []) or [])
# LLM stop should not be modified
assert mock_llm.stop == []
def test_sequential_crew_executions_no_stop_word_leak(self, mock_llm):
"""Simulate multiple sequential crew executions sharing an LLM.
This is the core reproduction of issue #4603: when crew.kickoff()
is called multiple times (e.g., in a workflow), the LLM's stop words
should not leak between executions.
"""
mock_llm.stop = []
for i in range(3):
agent = MagicMock()
agent.id = f"agent-{i}"
agent.role = f"Agent {i}"
agent.verbose = False
agent.key = f"key-{i}"
agent.security_config = None
task = MagicMock()
task.description = f"Task {i}"
task.human_input = False
task.response_model = None
crew = MagicMock()
crew.verbose = False
crew._train = False
crew._memory = None
executor = CrewAgentExecutor(
llm=mock_llm,
task=task,
agent=agent,
crew=crew,
prompt={"prompt": "Test {input} {tool_names} {tools}"},
max_iter=10,
tools=[],
tools_names="",
stop_words=["\nObservation:"],
tools_description="",
tools_handler=MagicMock(),
original_tools=[],
)
# Simulate execution
with patch.object(
executor, "_invoke_loop", return_value=AgentFinish(
thought="", output=f"result-{i}", text=f"result-{i}"
)
), patch.object(executor, "_setup_messages"), \
patch.object(executor, "_inject_multimodal_files"), \
patch.object(executor, "_show_start_logs"), \
patch.object(executor, "_save_to_memory"):
result = executor.invoke(
{"input": "test", "tool_names": "", "tools": ""}
)
assert result["output"] == f"result-{i}"
# After each execution, LLM's stop words should be clean
assert mock_llm.stop == [], (
f"LLM stop words leaked after execution {i}: {mock_llm.stop}"
)
class TestApplyStopWordsInteraction:
"""Tests that _apply_stop_words on the LLM doesn't truncate after executor cleanup."""
def test_apply_stop_words_not_triggered_after_restore(self, mock_llm):
"""Verify that after restoring stop words, _apply_stop_words doesn't truncate."""
mock_llm.stop = []
agent = MagicMock()
agent.id = "test-agent"
agent.role = "Test"
agent.verbose = False
agent.key = "key"
agent.security_config = None
task = MagicMock()
task.description = "Test"
task.human_input = False
task.response_model = None
crew = MagicMock()
crew.verbose = False
crew._train = False
crew._memory = None
executor = CrewAgentExecutor(
llm=mock_llm,
task=task,
agent=agent,
crew=crew,
prompt={"prompt": "Test {input} {tool_names} {tools}"},
max_iter=10,
tools=[],
tools_names="",
stop_words=["\nObservation:"],
tools_description="",
tools_handler=MagicMock(),
original_tools=[],
)
# Simulate execution and restore
with patch.object(
executor, "_invoke_loop", return_value=AgentFinish(
thought="", output="done", text="done"
)
), patch.object(executor, "_setup_messages"), \
patch.object(executor, "_inject_multimodal_files"), \
patch.object(executor, "_show_start_logs"), \
patch.object(executor, "_save_to_memory"):
executor.invoke({"input": "test", "tool_names": "", "tools": ""})
# Now simulate a subsequent LLM call (e.g., from another crew)
# The response contains "\nObservation:" but should NOT be truncated
# because the stop words have been restored to empty
long_response = (
"```json\n"
'{"analysis": "This is a detailed analysis with many findings. '
"The data shows significant trends across all metrics. "
"We observed multiple patterns including seasonal variations.\n"
'Observation: The key finding is that performance improved by 25%."}\n'
"```"
)
# With the fix, the LLM's stop words should be empty
assert mock_llm.stop == []
# Simulate what _apply_stop_words would do
# (testing the concept - the actual method is on BaseLLM)
content = long_response
if mock_llm.stop:
for stop_word in mock_llm.stop:
pos = content.find(stop_word)
if pos != -1:
content = content[:pos].strip()
# The full response should be preserved (not truncated)
assert content == long_response
assert len(content) > 200 # Should be the full response
class TestEdgeCases:
"""Test edge cases for the stop words fix."""
def test_executor_with_no_stop_words(self, executor_kwargs, mock_llm):
"""Verify executor works correctly when no stop words are provided."""
executor_kwargs["stop_words"] = []
mock_llm.stop = []
executor = CrewAgentExecutor(**executor_kwargs)
executor._set_llm_stop_words()
# No stop words should be set
assert mock_llm.stop == []
executor._restore_llm_stop_words()
assert mock_llm.stop == []
def test_executor_with_none_llm_stop(self, executor_kwargs, mock_llm):
"""Verify executor handles None stop words on LLM."""
mock_llm.stop = None
executor = CrewAgentExecutor(**executor_kwargs)
assert executor._original_llm_stop == []
executor._set_llm_stop_words()
assert "\nObservation:" in mock_llm.stop
executor._restore_llm_stop_words()
assert mock_llm.stop == []
def test_executor_with_no_llm(self, executor_kwargs):
"""Verify executor handles None LLM gracefully."""
executor_kwargs["llm"] = None
executor = CrewAgentExecutor(**executor_kwargs)
assert executor._original_llm_stop is None
# These should not raise
executor._set_llm_stop_words()
executor._restore_llm_stop_words()