Compare commits

..

2 Commits

Author SHA1 Message Date
Devin AI
37b75aeb6a fix: handle 'Action: None' in parser to prevent OutputParserError
When LLMs output 'Action: None' (or variations like 'Action: N/A'),
the parser now correctly treats this as a signal for a direct response
instead of raising an OutputParserError.

This fixes issue #4186 where the parser would fail and leak internal
'Thought:' text to users instead of providing a clean response.

Changes:
- Add ACTION_NONE_REGEX constant to match non-action values
- Update parse() to detect and handle Action: None patterns
- Convert Action: None to AgentFinish with the thought as output
- Add comprehensive tests for all variations

Closes #4186

Co-Authored-By: João <joao@crewai.com>
2026-01-06 19:44:09 +00:00
Mike Plachta
b787d7e591 Update webhook-streaming.mdx (#4184)
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Check Documentation Broken Links / Check broken links (push) Has been cancelled
2026-01-06 09:09:48 -08:00
9 changed files with 134 additions and 399 deletions

View File

@@ -55,7 +55,7 @@ Each webhook sends a list of events:
}
```
The `data` object structure varies by event type. Refer to the [event list](https://github.com/crewAIInc/crewAI/tree/main/src/crewai/utilities/events) on GitHub.
The `data` object structure varies by event type. Refer to the [event list](https://github.com/crewAIInc/crewAI/tree/main/lib/crewai/src/crewai/events/types) on GitHub.
As requests are sent over HTTP, the order of events can't be guaranteed. If you need ordering, use the `timestamp` field.

View File

@@ -26,3 +26,9 @@ ACTION_REGEX: Final[re.Pattern[str]] = re.compile(
ACTION_INPUT_ONLY_REGEX: Final[re.Pattern[str]] = re.compile(
r"\s*Action\s*\d*\s*Input\s*\d*\s*:\s*(.*)", re.DOTALL
)
# Regex to match "Action: None" or similar non-action values (None, N/A, etc.)
# This captures the action value and any text that follows it
ACTION_NONE_REGEX: Final[re.Pattern[str]] = re.compile(
r"Action\s*\d*\s*:\s*(none|n/a|na|no action|no_action)(?:\s*[-:(]?\s*(.*))?",
re.IGNORECASE | re.DOTALL,
)

View File

@@ -278,20 +278,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
self._invoke_step_callback(formatted_answer) # type: ignore[arg-type]
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message(formatted_answer.llm_response)
if formatted_answer.result:
self._append_message(
f"Observation: {formatted_answer.result}", role="user"
)
else:
# For final answer or other cases: append text as assistant
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
except OutputParserError as e:
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
@@ -444,20 +431,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
self._invoke_step_callback(formatted_answer) # type: ignore[arg-type]
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message(formatted_answer.llm_response)
if formatted_answer.result:
self._append_message(
f"Observation: {formatted_answer.result}", role="user"
)
else:
# For final answer or other cases: append text as assistant
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
except OutputParserError as e:
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
@@ -507,18 +481,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
Updated action or final answer.
"""
# Special case for add_image_tool
# Note: Even for add_image_tool, we should not attribute tool output to assistant
# to avoid LLM hallucination. The LLM's action is stored as assistant message,
# and the tool result (image) is stored as user message.
add_image_tool = self._i18n.tools("add_image")
if (
isinstance(add_image_tool, dict)
and formatted_answer.tool.casefold().strip()
== add_image_tool.get("name", "").casefold().strip()
):
# Store original LLM response for proper message attribution
formatted_answer.llm_response = formatted_answer.text
formatted_answer.result = tool_result.result
self.messages.append({"role": "assistant", "content": tool_result.result})
return formatted_answer
return handle_agent_action_core(

View File

@@ -12,6 +12,7 @@ from json_repair import repair_json # type: ignore[import-untyped]
from crewai.agents.constants import (
ACTION_INPUT_ONLY_REGEX,
ACTION_INPUT_REGEX,
ACTION_NONE_REGEX,
ACTION_REGEX,
FINAL_ANSWER_ACTION,
MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE,
@@ -33,7 +34,6 @@ class AgentAction:
tool_input: str
text: str
result: str | None = None
llm_response: str | None = None # Original LLM response before observation appended
@dataclass
@@ -119,6 +119,34 @@ def parse(text: str) -> AgentAction | AgentFinish:
thought=thought, tool=clean_action, tool_input=safe_tool_input, text=text
)
# Check for "Action: None" or similar non-action values
# This handles cases where the LLM indicates it cannot/should not use a tool
action_none_match = ACTION_NONE_REGEX.search(text)
if action_none_match:
# Extract any additional content after "Action: None"
additional_content = action_none_match.group(2)
if additional_content:
additional_content = additional_content.strip()
# Remove trailing parenthesis if present (from patterns like "Action: None (reason)")
if additional_content.startswith("(") and ")" in additional_content:
additional_content = additional_content.split(")", 1)[-1].strip()
elif additional_content.startswith(")"):
additional_content = additional_content[1:].strip()
# Build the final answer from thought and any additional content
final_answer = thought
if additional_content:
if final_answer:
final_answer = f"{final_answer}\n\n{additional_content}"
else:
final_answer = additional_content
# If we still have no content, use a generic message
if not final_answer:
final_answer = "I cannot perform this action with the available tools."
return AgentFinish(thought=thought, output=final_answer, text=text)
if not ACTION_REGEX.search(text):
raise OutputParserError(
f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{_I18N.slice('final_answer_format')}",

View File

@@ -349,18 +349,8 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
# Invoke step callback if configured
self._invoke_step_callback(result)
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(result, AgentAction) and result.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message_to_state(result.llm_response)
if result.result:
self._append_message_to_state(
f"Observation: {result.result}", role="user"
)
elif hasattr(result, "text"):
# For final answer or other cases: append text as assistant
# Append result message to conversation state
if hasattr(result, "text"):
self._append_message_to_state(result.text)
# Check if tool result became a final answer (result_as_answer flag)
@@ -547,19 +537,15 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
Returns:
Updated action or final answer.
"""
# Special case for add_image_tool
# Note: Even for add_image_tool, we should not attribute tool output to assistant
# to avoid LLM hallucination. The LLM's action is stored as assistant message,
# and the tool result (image) is stored as user message.
add_image_tool = self._i18n.tools("add_image")
if (
isinstance(add_image_tool, dict)
and formatted_answer.tool.casefold().strip()
== add_image_tool.get("name", "").casefold().strip()
):
# Store original LLM response for proper message attribution
formatted_answer.llm_response = formatted_answer.text
formatted_answer.result = tool_result.result
self.state.messages.append(
{"role": "assistant", "content": tool_result.result}
)
return formatted_answer
return handle_agent_action_core(

View File

@@ -582,19 +582,7 @@ class LiteAgent(FlowTrackable, BaseModel):
show_logs=self._show_logs,
)
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message(formatted_answer.llm_response, role="assistant")
if formatted_answer.result:
self._append_message(
f"Observation: {formatted_answer.result}", role="user"
)
else:
# For final answer or other cases: append text as assistant
self._append_message(formatted_answer.text, role="assistant")
self._append_message(formatted_answer.text, role="assistant")
except OutputParserError as e: # noqa: PERF203
self._printer.print(
content="Failed to parse LLM output. Retrying...",

View File

@@ -382,21 +382,10 @@ def handle_agent_action_core(
Notes:
- TODO: Remove messages parameter and its usage.
- The observation is appended to formatted_answer.text for logging/trace
purposes, but callers should use formatted_answer.llm_response for the
assistant message (without observation) and append the observation
separately as a user message to avoid LLM hallucination of observations.
"""
if step_callback:
step_callback(tool_result)
# Store the original LLM response before appending observation
# This is used by executors to correctly attribute messages:
# - llm_response goes as assistant message
# - observation goes as user message (to prevent LLM hallucination)
formatted_answer.llm_response = formatted_answer.text
# Append observation to text for logging/trace purposes
formatted_answer.text += f"\nObservation: {tool_result.result}"
formatted_answer.result = tool_result.result

View File

@@ -360,3 +360,92 @@ def test_integration_valid_and_invalid():
# TODO: ADD TEST TO MAKE SURE ** REMOVAL DOESN'T MESS UP ANYTHING
# Tests for Action: None handling (Issue #4186)
def test_action_none_basic():
"""Test that 'Action: None' is parsed as AgentFinish."""
text = "Thought: I cannot use any tool for this.\nAction: None"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "I cannot use any tool for this." in result.output
def test_action_none_with_reason_in_parentheses():
"""Test 'Action: None (reason)' format."""
text = "Thought: The tool is not available.\nAction: None (direct response required)"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "The tool is not available." in result.output
def test_action_none_lowercase():
"""Test that 'Action: none' (lowercase) is handled."""
text = "Thought: I should respond directly.\nAction: none"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "I should respond directly." in result.output
def test_action_na():
"""Test that 'Action: N/A' is handled."""
text = "Thought: No action needed here.\nAction: N/A"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "No action needed here." in result.output
def test_action_na_lowercase():
"""Test that 'Action: n/a' (lowercase) is handled."""
text = "Thought: This requires a direct answer.\nAction: n/a"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "This requires a direct answer." in result.output
def test_action_none_with_dash_separator():
"""Test 'Action: None - reason' format."""
text = "Thought: I need to provide a direct response.\nAction: None - direct response"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "I need to provide a direct response." in result.output
def test_action_none_with_additional_content():
"""Test 'Action: None' with additional content after."""
text = "Thought: I analyzed the request.\nAction: None\nHere is my direct response to your question."
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "I analyzed the request." in result.output
def test_action_no_action():
"""Test that 'Action: no action' is handled."""
text = "Thought: I will respond without using tools.\nAction: no action"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "I will respond without using tools." in result.output
def test_action_none_without_thought():
"""Test 'Action: None' without a thought prefix."""
text = "Action: None"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert result.output == "I cannot perform this action with the available tools."
def test_action_none_preserves_original_text():
"""Test that the original text is preserved in the result."""
text = "Thought: I cannot delegate this task.\nAction: None"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert result.text == text
def test_action_none_with_colon_separator():
"""Test 'Action: None: reason' format."""
text = "Thought: Direct response needed.\nAction: None: providing direct answer"
result = parser.parse(text)
assert isinstance(result, AgentFinish)
assert "Direct response needed." in result.output

View File

@@ -1,320 +0,0 @@
"""Tests for proper message attribution to prevent LLM observation hallucination.
This module tests that tool observations are correctly attributed to user messages
rather than assistant messages, which prevents the LLM from learning to hallucinate
fake observations during tool calls.
Related to GitHub issue #4181.
"""
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentAction, AgentFinish
from crewai.tools.tool_types import ToolResult
from crewai.utilities.agent_utils import handle_agent_action_core
@pytest.fixture
def mock_llm() -> MagicMock:
"""Create a mock LLM for testing."""
llm = MagicMock()
llm.supports_stop_words.return_value = True
llm.stop = []
return llm
@pytest.fixture
def mock_agent() -> MagicMock:
"""Create a mock agent for testing."""
agent = MagicMock()
agent.role = "Test Agent"
agent.key = "test_agent_key"
agent.verbose = False
agent.id = "test_agent_id"
return agent
@pytest.fixture
def mock_task() -> MagicMock:
"""Create a mock task for testing."""
task = MagicMock()
task.description = "Test task description"
return task
@pytest.fixture
def mock_crew() -> MagicMock:
"""Create a mock crew for testing."""
crew = MagicMock()
crew.verbose = False
crew._train = False
return crew
@pytest.fixture
def mock_tools_handler() -> MagicMock:
"""Create a mock tools handler."""
return MagicMock()
@pytest.fixture
def executor(
mock_llm: MagicMock,
mock_agent: MagicMock,
mock_task: MagicMock,
mock_crew: MagicMock,
mock_tools_handler: MagicMock,
) -> CrewAgentExecutor:
"""Create a CrewAgentExecutor instance for testing."""
return CrewAgentExecutor(
llm=mock_llm,
task=mock_task,
crew=mock_crew,
agent=mock_agent,
prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
max_iter=5,
tools=[],
tools_names="",
stop_words=["Observation:"],
tools_description="",
tools_handler=mock_tools_handler,
)
class TestHandleAgentActionCore:
"""Tests for handle_agent_action_core function."""
def test_stores_llm_response_before_observation(self) -> None:
"""Test that llm_response is stored before observation is appended."""
original_text = "Thought: I need to search\nAction: search\nAction Input: query"
action = AgentAction(
thought="I need to search",
tool="search",
tool_input="query",
text=original_text,
)
tool_result = ToolResult(result="Search result: found data", result_as_answer=False)
result = handle_agent_action_core(
formatted_answer=action,
tool_result=tool_result,
)
assert isinstance(result, AgentAction)
assert result.llm_response == original_text
assert "Observation:" in result.text
assert result.result == "Search result: found data"
def test_text_contains_observation_for_logging(self) -> None:
"""Test that text contains observation for logging purposes."""
action = AgentAction(
thought="Testing",
tool="test_tool",
tool_input="{}",
text="Thought: Testing\nAction: test_tool\nAction Input: {}",
)
tool_result = ToolResult(result="Tool output", result_as_answer=False)
result = handle_agent_action_core(
formatted_answer=action,
tool_result=tool_result,
)
assert isinstance(result, AgentAction)
assert "Observation: Tool output" in result.text
def test_result_as_answer_returns_agent_finish(self) -> None:
"""Test that result_as_answer=True returns AgentFinish."""
action = AgentAction(
thought="Using tool",
tool="final_tool",
tool_input="{}",
text="Thought: Using tool\nAction: final_tool\nAction Input: {}",
)
tool_result = ToolResult(result="Final answer from tool", result_as_answer=True)
result = handle_agent_action_core(
formatted_answer=action,
tool_result=tool_result,
)
assert isinstance(result, AgentFinish)
assert result.output == "Final answer from tool"
class TestCrewAgentExecutorMessageAttribution:
"""Tests for proper message attribution in CrewAgentExecutor."""
def test_tool_observation_not_in_assistant_message(
self, executor: CrewAgentExecutor
) -> None:
"""Test that tool observations are not attributed to assistant messages.
This is the core fix for GitHub issue #4181 - observations should be
in user messages, not assistant messages, to prevent LLM hallucination.
"""
call_count = 0
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
if call_count == 1:
return (
"Thought: I need to use a tool\n"
"Action: test_tool\n"
'Action Input: {"arg": "value"}'
)
return "Thought: I have the answer\nFinal Answer: Done"
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(
result="Tool executed successfully", result_as_answer=False
),
):
with patch.object(executor, "_show_logs"):
result = executor._invoke_loop()
assert isinstance(result, AgentFinish)
assistant_messages = [
msg for msg in executor.messages if msg.get("role") == "assistant"
]
user_messages = [msg for msg in executor.messages if msg.get("role") == "user"]
for msg in assistant_messages:
content = msg.get("content", "")
assert "Observation:" not in content, (
f"Assistant message should not contain 'Observation:'. "
f"Found: {content[:100]}..."
)
observation_in_user = any(
"Observation:" in msg.get("content", "") for msg in user_messages
)
assert observation_in_user, (
"Tool observation should be in a user message, not assistant message"
)
def test_llm_response_in_assistant_message(
self, executor: CrewAgentExecutor
) -> None:
"""Test that the LLM's actual response is in assistant messages."""
call_count = 0
llm_action_text = (
"Thought: I need to use a tool\n"
"Action: test_tool\n"
'Action Input: {"arg": "value"}'
)
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
if call_count == 1:
return llm_action_text
return "Thought: I have the answer\nFinal Answer: Done"
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(
result="Tool executed successfully", result_as_answer=False
),
):
with patch.object(executor, "_show_logs"):
executor._invoke_loop()
assistant_messages = [
msg for msg in executor.messages if msg.get("role") == "assistant"
]
llm_response_found = any(
"Action: test_tool" in msg.get("content", "") for msg in assistant_messages
)
assert llm_response_found, (
"LLM's action response should be in an assistant message"
)
def test_message_order_after_tool_use(
self, executor: CrewAgentExecutor
) -> None:
"""Test that messages are in correct order: assistant (action) then user (observation)."""
call_count = 0
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
if call_count == 1:
return (
"Thought: I need to use a tool\n"
"Action: test_tool\n"
'Action Input: {"arg": "value"}'
)
return "Thought: I have the answer\nFinal Answer: Done"
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(
result="Tool executed successfully", result_as_answer=False
),
):
with patch.object(executor, "_show_logs"):
executor._invoke_loop()
action_msg_idx = None
observation_msg_idx = None
for i, msg in enumerate(executor.messages):
content = msg.get("content", "")
if "Action: test_tool" in content and msg.get("role") == "assistant":
action_msg_idx = i
if "Observation:" in content and msg.get("role") == "user":
observation_msg_idx = i
assert action_msg_idx is not None, "Action message not found"
assert observation_msg_idx is not None, "Observation message not found"
assert observation_msg_idx == action_msg_idx + 1, (
f"Observation (user) should immediately follow action (assistant). "
f"Action at {action_msg_idx}, Observation at {observation_msg_idx}"
)
class TestAgentActionLlmResponseField:
"""Tests for the llm_response field on AgentAction."""
def test_agent_action_has_llm_response_field(self) -> None:
"""Test that AgentAction has llm_response field."""
action = AgentAction(
thought="Test",
tool="test",
tool_input="{}",
text="Test text",
)
assert hasattr(action, "llm_response")
assert action.llm_response is None
def test_agent_action_llm_response_can_be_set(self) -> None:
"""Test that llm_response can be set on AgentAction."""
action = AgentAction(
thought="Test",
tool="test",
tool_input="{}",
text="Test text",
)
action.llm_response = "Original LLM response"
assert action.llm_response == "Original LLM response"