fix: prevent LLM observation hallucination by properly attributing tool results

Fixes #4181 The issue was that tool observations were being appended to the assistant message in the conversation history, which caused the LLM to learn to hallucinate fake observations during tool calls. Changes: - Add llm_response field to AgentAction to store the original LLM response before observation is appended - Modify handle_agent_action_core to store llm_response before appending observation to text (text still contains observation for logging) - Update CrewAgentExecutor._invoke_loop and _ainvoke_loop to: - Append LLM response as assistant message - Append observation as user message (not assistant) - Apply same fix to LiteAgent._invoke_loop - Apply same fix to CrewAgentExecutorFlow.execute_tool_action - Fix add_image_tool special case in both executors to use same pattern - Add comprehensive tests for proper message attribution Co-Authored-By: João <joao@crewai.com>
2026-01-18 12:28:15 +00:00 · 2026-01-06 06:57:16 +00:00
6 changed files with 398 additions and 9 deletions
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -278,7 +278,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    )

                self._invoke_step_callback(formatted_answer)  # type: ignore[arg-type]
-                self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]
+
+                # Properly attribute messages to avoid LLM hallucination of observations:
+                # - LLM's response goes as assistant message
+                # - Tool observation goes as user message (not assistant)
+                if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
+                    # For tool use: append LLM response as assistant, observation as user
+                    self._append_message(formatted_answer.llm_response)
+                    if formatted_answer.result:
+                        self._append_message(
+                            f"Observation: {formatted_answer.result}", role="user"
+                        )
+                else:
+                    # For final answer or other cases: append text as assistant
+                    self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]

            except OutputParserError as e:
                formatted_answer = handle_output_parser_exception(  # type: ignore[assignment]
@@ -431,7 +444,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    )

                self._invoke_step_callback(formatted_answer)  # type: ignore[arg-type]
-                self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]
+
+                # Properly attribute messages to avoid LLM hallucination of observations:
+                # - LLM's response goes as assistant message
+                # - Tool observation goes as user message (not assistant)
+                if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
+                    # For tool use: append LLM response as assistant, observation as user
+                    self._append_message(formatted_answer.llm_response)
+                    if formatted_answer.result:
+                        self._append_message(
+                            f"Observation: {formatted_answer.result}", role="user"
+                        )
+                else:
+                    # For final answer or other cases: append text as assistant
+                    self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]

            except OutputParserError as e:
                formatted_answer = handle_output_parser_exception(  # type: ignore[assignment]
@@ -481,13 +507,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            Updated action or final answer.
        """
        # Special case for add_image_tool
+        # Note: Even for add_image_tool, we should not attribute tool output to assistant
+        # to avoid LLM hallucination. The LLM's action is stored as assistant message,
+        # and the tool result (image) is stored as user message.
        add_image_tool = self._i18n.tools("add_image")
        if (
            isinstance(add_image_tool, dict)
            and formatted_answer.tool.casefold().strip()
            == add_image_tool.get("name", "").casefold().strip()
        ):
-            self.messages.append({"role": "assistant", "content": tool_result.result})
+            # Store original LLM response for proper message attribution
+            formatted_answer.llm_response = formatted_answer.text
+            formatted_answer.result = tool_result.result
            return formatted_answer

        return handle_agent_action_core(
--- a/lib/crewai/src/crewai/agents/parser.py
+++ b/lib/crewai/src/crewai/agents/parser.py
@@ -33,6 +33,7 @@ class AgentAction:
    tool_input: str
    text: str
    result: str | None = None
+    llm_response: str | None = None  # Original LLM response before observation appended


@dataclass
--- a/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
+++ b/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
@@ -349,8 +349,18 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
            # Invoke step callback if configured
            self._invoke_step_callback(result)

-            # Append result message to conversation state
-            if hasattr(result, "text"):
+            # Properly attribute messages to avoid LLM hallucination of observations:
+            # - LLM's response goes as assistant message
+            # - Tool observation goes as user message (not assistant)
+            if isinstance(result, AgentAction) and result.llm_response:
+                # For tool use: append LLM response as assistant, observation as user
+                self._append_message_to_state(result.llm_response)
+                if result.result:
+                    self._append_message_to_state(
+                        f"Observation: {result.result}", role="user"
+                    )
+            elif hasattr(result, "text"):
+                # For final answer or other cases: append text as assistant
                self._append_message_to_state(result.text)

            # Check if tool result became a final answer (result_as_answer flag)
@@ -537,15 +547,19 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
        Returns:
            Updated action or final answer.
        """
+        # Special case for add_image_tool
+        # Note: Even for add_image_tool, we should not attribute tool output to assistant
+        # to avoid LLM hallucination. The LLM's action is stored as assistant message,
+        # and the tool result (image) is stored as user message.
        add_image_tool = self._i18n.tools("add_image")
        if (
            isinstance(add_image_tool, dict)
            and formatted_answer.tool.casefold().strip()
            == add_image_tool.get("name", "").casefold().strip()
        ):
-            self.state.messages.append(
-                {"role": "assistant", "content": tool_result.result}
-            )
+            # Store original LLM response for proper message attribution
+            formatted_answer.llm_response = formatted_answer.text
+            formatted_answer.result = tool_result.result
            return formatted_answer

        return handle_agent_action_core(
--- a/lib/crewai/src/crewai/lite_agent.py
+++ b/lib/crewai/src/crewai/lite_agent.py
@@ -582,7 +582,19 @@ class LiteAgent(FlowTrackable, BaseModel):
                        show_logs=self._show_logs,
                    )

-                self._append_message(formatted_answer.text, role="assistant")
+                # Properly attribute messages to avoid LLM hallucination of observations:
+                # - LLM's response goes as assistant message
+                # - Tool observation goes as user message (not assistant)
+                if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
+                    # For tool use: append LLM response as assistant, observation as user
+                    self._append_message(formatted_answer.llm_response, role="assistant")
+                    if formatted_answer.result:
+                        self._append_message(
+                            f"Observation: {formatted_answer.result}", role="user"
+                        )
+                else:
+                    # For final answer or other cases: append text as assistant
+                    self._append_message(formatted_answer.text, role="assistant")
            except OutputParserError as e:  # noqa: PERF203
                self._printer.print(
                    content="Failed to parse LLM output. Retrying...",
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -382,10 +382,21 @@ def handle_agent_action_core(

    Notes:
        - TODO: Remove messages parameter and its usage.
+        - The observation is appended to formatted_answer.text for logging/trace
+          purposes, but callers should use formatted_answer.llm_response for the
+          assistant message (without observation) and append the observation
+          separately as a user message to avoid LLM hallucination of observations.
    """
    if step_callback:
        step_callback(tool_result)

+    # Store the original LLM response before appending observation
+    # This is used by executors to correctly attribute messages:
+    # - llm_response goes as assistant message
+    # - observation goes as user message (to prevent LLM hallucination)
+    formatted_answer.llm_response = formatted_answer.text
+
+    # Append observation to text for logging/trace purposes
    formatted_answer.text += f"\nObservation: {tool_result.result}"
    formatted_answer.result = tool_result.result

--- a/lib/crewai/tests/agents/test_observation_message_attribution.py
+++ b/lib/crewai/tests/agents/test_observation_message_attribution.py
@@ -0,0 +1,320 @@
+"""Tests for proper message attribution to prevent LLM observation hallucination.
+
+This module tests that tool observations are correctly attributed to user messages
+rather than assistant messages, which prevents the LLM from learning to hallucinate
+fake observations during tool calls.
+
+Related to GitHub issue #4181.
+"""
+
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from crewai.agents.crew_agent_executor import CrewAgentExecutor
+from crewai.agents.parser import AgentAction, AgentFinish
+from crewai.tools.tool_types import ToolResult
+from crewai.utilities.agent_utils import handle_agent_action_core
+
+
+@pytest.fixture
+def mock_llm() -> MagicMock:
+    """Create a mock LLM for testing."""
+    llm = MagicMock()
+    llm.supports_stop_words.return_value = True
+    llm.stop = []
+    return llm
+
+
+@pytest.fixture
+def mock_agent() -> MagicMock:
+    """Create a mock agent for testing."""
+    agent = MagicMock()
+    agent.role = "Test Agent"
+    agent.key = "test_agent_key"
+    agent.verbose = False
+    agent.id = "test_agent_id"
+    return agent
+
+
+@pytest.fixture
+def mock_task() -> MagicMock:
+    """Create a mock task for testing."""
+    task = MagicMock()
+    task.description = "Test task description"
+    return task
+
+
+@pytest.fixture
+def mock_crew() -> MagicMock:
+    """Create a mock crew for testing."""
+    crew = MagicMock()
+    crew.verbose = False
+    crew._train = False
+    return crew
+
+
+@pytest.fixture
+def mock_tools_handler() -> MagicMock:
+    """Create a mock tools handler."""
+    return MagicMock()
+
+
+@pytest.fixture
+def executor(
+    mock_llm: MagicMock,
+    mock_agent: MagicMock,
+    mock_task: MagicMock,
+    mock_crew: MagicMock,
+    mock_tools_handler: MagicMock,
+) -> CrewAgentExecutor:
+    """Create a CrewAgentExecutor instance for testing."""
+    return CrewAgentExecutor(
+        llm=mock_llm,
+        task=mock_task,
+        crew=mock_crew,
+        agent=mock_agent,
+        prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
+        max_iter=5,
+        tools=[],
+        tools_names="",
+        stop_words=["Observation:"],
+        tools_description="",
+        tools_handler=mock_tools_handler,
+    )
+
+
+class TestHandleAgentActionCore:
+    """Tests for handle_agent_action_core function."""
+
+    def test_stores_llm_response_before_observation(self) -> None:
+        """Test that llm_response is stored before observation is appended."""
+        original_text = "Thought: I need to search\nAction: search\nAction Input: query"
+        action = AgentAction(
+            thought="I need to search",
+            tool="search",
+            tool_input="query",
+            text=original_text,
+        )
+        tool_result = ToolResult(result="Search result: found data", result_as_answer=False)
+
+        result = handle_agent_action_core(
+            formatted_answer=action,
+            tool_result=tool_result,
+        )
+
+        assert isinstance(result, AgentAction)
+        assert result.llm_response == original_text
+        assert "Observation:" in result.text
+        assert result.result == "Search result: found data"
+
+    def test_text_contains_observation_for_logging(self) -> None:
+        """Test that text contains observation for logging purposes."""
+        action = AgentAction(
+            thought="Testing",
+            tool="test_tool",
+            tool_input="{}",
+            text="Thought: Testing\nAction: test_tool\nAction Input: {}",
+        )
+        tool_result = ToolResult(result="Tool output", result_as_answer=False)
+
+        result = handle_agent_action_core(
+            formatted_answer=action,
+            tool_result=tool_result,
+        )
+
+        assert isinstance(result, AgentAction)
+        assert "Observation: Tool output" in result.text
+
+    def test_result_as_answer_returns_agent_finish(self) -> None:
+        """Test that result_as_answer=True returns AgentFinish."""
+        action = AgentAction(
+            thought="Using tool",
+            tool="final_tool",
+            tool_input="{}",
+            text="Thought: Using tool\nAction: final_tool\nAction Input: {}",
+        )
+        tool_result = ToolResult(result="Final answer from tool", result_as_answer=True)
+
+        result = handle_agent_action_core(
+            formatted_answer=action,
+            tool_result=tool_result,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.output == "Final answer from tool"
+
+
+class TestCrewAgentExecutorMessageAttribution:
+    """Tests for proper message attribution in CrewAgentExecutor."""
+
+    def test_tool_observation_not_in_assistant_message(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that tool observations are not attributed to assistant messages.
+
+        This is the core fix for GitHub issue #4181 - observations should be
+        in user messages, not assistant messages, to prevent LLM hallucination.
+        """
+        call_count = 0
+
+        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (
+                    "Thought: I need to use a tool\n"
+                    "Action: test_tool\n"
+                    'Action Input: {"arg": "value"}'
+                )
+            return "Thought: I have the answer\nFinal Answer: Done"
+
+        with patch(
+            "crewai.agents.crew_agent_executor.get_llm_response",
+            side_effect=mock_llm_response,
+        ):
+            with patch(
+                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
+                return_value=ToolResult(
+                    result="Tool executed successfully", result_as_answer=False
+                ),
+            ):
+                with patch.object(executor, "_show_logs"):
+                    result = executor._invoke_loop()
+
+        assert isinstance(result, AgentFinish)
+
+        assistant_messages = [
+            msg for msg in executor.messages if msg.get("role") == "assistant"
+        ]
+        user_messages = [msg for msg in executor.messages if msg.get("role") == "user"]
+
+        for msg in assistant_messages:
+            content = msg.get("content", "")
+            assert "Observation:" not in content, (
+                f"Assistant message should not contain 'Observation:'. "
+                f"Found: {content[:100]}..."
+            )
+
+        observation_in_user = any(
+            "Observation:" in msg.get("content", "") for msg in user_messages
+        )
+        assert observation_in_user, (
+            "Tool observation should be in a user message, not assistant message"
+        )
+
+    def test_llm_response_in_assistant_message(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that the LLM's actual response is in assistant messages."""
+        call_count = 0
+        llm_action_text = (
+            "Thought: I need to use a tool\n"
+            "Action: test_tool\n"
+            'Action Input: {"arg": "value"}'
+        )
+
+        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return llm_action_text
+            return "Thought: I have the answer\nFinal Answer: Done"
+
+        with patch(
+            "crewai.agents.crew_agent_executor.get_llm_response",
+            side_effect=mock_llm_response,
+        ):
+            with patch(
+                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
+                return_value=ToolResult(
+                    result="Tool executed successfully", result_as_answer=False
+                ),
+            ):
+                with patch.object(executor, "_show_logs"):
+                    executor._invoke_loop()
+
+        assistant_messages = [
+            msg for msg in executor.messages if msg.get("role") == "assistant"
+        ]
+
+        llm_response_found = any(
+            "Action: test_tool" in msg.get("content", "") for msg in assistant_messages
+        )
+        assert llm_response_found, (
+            "LLM's action response should be in an assistant message"
+        )
+
+    def test_message_order_after_tool_use(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that messages are in correct order: assistant (action) then user (observation)."""
+        call_count = 0
+
+        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (
+                    "Thought: I need to use a tool\n"
+                    "Action: test_tool\n"
+                    'Action Input: {"arg": "value"}'
+                )
+            return "Thought: I have the answer\nFinal Answer: Done"
+
+        with patch(
+            "crewai.agents.crew_agent_executor.get_llm_response",
+            side_effect=mock_llm_response,
+        ):
+            with patch(
+                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
+                return_value=ToolResult(
+                    result="Tool executed successfully", result_as_answer=False
+                ),
+            ):
+                with patch.object(executor, "_show_logs"):
+                    executor._invoke_loop()
+
+        action_msg_idx = None
+        observation_msg_idx = None
+
+        for i, msg in enumerate(executor.messages):
+            content = msg.get("content", "")
+            if "Action: test_tool" in content and msg.get("role") == "assistant":
+                action_msg_idx = i
+            if "Observation:" in content and msg.get("role") == "user":
+                observation_msg_idx = i
+
+        assert action_msg_idx is not None, "Action message not found"
+        assert observation_msg_idx is not None, "Observation message not found"
+        assert observation_msg_idx == action_msg_idx + 1, (
+            f"Observation (user) should immediately follow action (assistant). "
+            f"Action at {action_msg_idx}, Observation at {observation_msg_idx}"
+        )
+
+
+class TestAgentActionLlmResponseField:
+    """Tests for the llm_response field on AgentAction."""
+
+    def test_agent_action_has_llm_response_field(self) -> None:
+        """Test that AgentAction has llm_response field."""
+        action = AgentAction(
+            thought="Test",
+            tool="test",
+            tool_input="{}",
+            text="Test text",
+        )
+        assert hasattr(action, "llm_response")
+        assert action.llm_response is None
+
+    def test_agent_action_llm_response_can_be_set(self) -> None:
+        """Test that llm_response can be set on AgentAction."""
+        action = AgentAction(
+            thought="Test",
+            tool="test",
+            tool_input="{}",
+            text="Test text",
+        )
+        action.llm_response = "Original LLM response"
+        assert action.llm_response == "Original LLM response"