fix: prevent LLM observation hallucination by properly attributing tool results

Fixes #4181 The issue was that tool observations were being appended to the assistant message in the conversation history, which caused the LLM to learn to hallucinate fake observations during tool calls. Changes: - Add llm_response field to AgentAction to store the original LLM response before observation is appended - Modify handle_agent_action_core to store llm_response before appending observation to text (text still contains observation for logging) - Update CrewAgentExecutor._invoke_loop and _ainvoke_loop to: - Append LLM response as assistant message - Append observation as user message (not assistant) - Apply same fix to LiteAgent._invoke_loop - Apply same fix to CrewAgentExecutorFlow.execute_tool_action - Fix add_image_tool special case in both executors to use same pattern - Add comprehensive tests for proper message attribution Co-Authored-By: João <joao@crewai.com>
2026-01-24 15:48:23 +00:00 · 2026-01-06 06:57:16 +00:00
9 changed files with 399 additions and 134 deletions
--- a/docs/en/enterprise/features/webhook-streaming.mdx
+++ b/docs/en/enterprise/features/webhook-streaming.mdx
@@ -55,7 +55,7 @@ Each webhook sends a list of events:
 }
 ```

-The `data` object structure varies by event type. Refer to the [event list](https://github.com/crewAIInc/crewAI/tree/main/lib/crewai/src/crewai/events/types) on GitHub.
+The `data` object structure varies by event type. Refer to the [event list](https://github.com/crewAIInc/crewAI/tree/main/src/crewai/utilities/events) on GitHub.

 As requests are sent over HTTP, the order of events can't be guaranteed. If you need ordering, use the `timestamp` field.

--- a/lib/crewai/src/crewai/agents/constants.py
+++ b/lib/crewai/src/crewai/agents/constants.py
@@ -26,9 +26,3 @@ ACTION_REGEX: Final[re.Pattern[str]] = re.compile(
 ACTION_INPUT_ONLY_REGEX: Final[re.Pattern[str]] = re.compile(
    r"\s*Action\s*\d*\s*Input\s*\d*\s*:\s*(.*)", re.DOTALL
 )
-# Regex to match "Action: None" or similar non-action values (None, N/A, etc.)
-# This captures the action value and any text that follows it
-ACTION_NONE_REGEX: Final[re.Pattern[str]] = re.compile(
-    r"Action\s*\d*\s*:\s*(none|n/a|na|no action|no_action)(?:\s*[-:(]?\s*(.*))?",
-    re.IGNORECASE | re.DOTALL,
-)
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -278,7 +278,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    )

                self._invoke_step_callback(formatted_answer)  # type: ignore[arg-type]
-                self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]
+
+                # Properly attribute messages to avoid LLM hallucination of observations:
+                # - LLM's response goes as assistant message
+                # - Tool observation goes as user message (not assistant)
+                if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
+                    # For tool use: append LLM response as assistant, observation as user
+                    self._append_message(formatted_answer.llm_response)
+                    if formatted_answer.result:
+                        self._append_message(
+                            f"Observation: {formatted_answer.result}", role="user"
+                        )
+                else:
+                    # For final answer or other cases: append text as assistant
+                    self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]

            except OutputParserError as e:
                formatted_answer = handle_output_parser_exception(  # type: ignore[assignment]
@@ -431,7 +444,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    )

                self._invoke_step_callback(formatted_answer)  # type: ignore[arg-type]
-                self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]
+
+                # Properly attribute messages to avoid LLM hallucination of observations:
+                # - LLM's response goes as assistant message
+                # - Tool observation goes as user message (not assistant)
+                if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
+                    # For tool use: append LLM response as assistant, observation as user
+                    self._append_message(formatted_answer.llm_response)
+                    if formatted_answer.result:
+                        self._append_message(
+                            f"Observation: {formatted_answer.result}", role="user"
+                        )
+                else:
+                    # For final answer or other cases: append text as assistant
+                    self._append_message(formatted_answer.text)  # type: ignore[union-attr,attr-defined]

            except OutputParserError as e:
                formatted_answer = handle_output_parser_exception(  # type: ignore[assignment]
@@ -481,13 +507,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            Updated action or final answer.
        """
        # Special case for add_image_tool
+        # Note: Even for add_image_tool, we should not attribute tool output to assistant
+        # to avoid LLM hallucination. The LLM's action is stored as assistant message,
+        # and the tool result (image) is stored as user message.
        add_image_tool = self._i18n.tools("add_image")
        if (
            isinstance(add_image_tool, dict)
            and formatted_answer.tool.casefold().strip()
            == add_image_tool.get("name", "").casefold().strip()
        ):
-            self.messages.append({"role": "assistant", "content": tool_result.result})
+            # Store original LLM response for proper message attribution
+            formatted_answer.llm_response = formatted_answer.text
+            formatted_answer.result = tool_result.result
            return formatted_answer

        return handle_agent_action_core(
--- a/lib/crewai/src/crewai/agents/parser.py
+++ b/lib/crewai/src/crewai/agents/parser.py
@@ -12,7 +12,6 @@ from json_repair import repair_json  # type: ignore[import-untyped]
 from crewai.agents.constants import (
    ACTION_INPUT_ONLY_REGEX,
    ACTION_INPUT_REGEX,
-    ACTION_NONE_REGEX,
    ACTION_REGEX,
    FINAL_ANSWER_ACTION,
    MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE,
@@ -34,6 +33,7 @@ class AgentAction:
    tool_input: str
    text: str
    result: str | None = None
+    llm_response: str | None = None  # Original LLM response before observation appended


@dataclass
@@ -119,34 +119,6 @@ def parse(text: str) -> AgentAction | AgentFinish:
            thought=thought, tool=clean_action, tool_input=safe_tool_input, text=text
        )

-    # Check for "Action: None" or similar non-action values
-    # This handles cases where the LLM indicates it cannot/should not use a tool
-    action_none_match = ACTION_NONE_REGEX.search(text)
-    if action_none_match:
-        # Extract any additional content after "Action: None"
-        additional_content = action_none_match.group(2)
-        if additional_content:
-            additional_content = additional_content.strip()
-            # Remove trailing parenthesis if present (from patterns like "Action: None (reason)")
-            if additional_content.startswith("(") and ")" in additional_content:
-                additional_content = additional_content.split(")", 1)[-1].strip()
-            elif additional_content.startswith(")"):
-                additional_content = additional_content[1:].strip()
-
-        # Build the final answer from thought and any additional content
-        final_answer = thought
-        if additional_content:
-            if final_answer:
-                final_answer = f"{final_answer}\n\n{additional_content}"
-            else:
-                final_answer = additional_content
-
-        # If we still have no content, use a generic message
-        if not final_answer:
-            final_answer = "I cannot perform this action with the available tools."
-
-        return AgentFinish(thought=thought, output=final_answer, text=text)
-
    if not ACTION_REGEX.search(text):
        raise OutputParserError(
            f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{_I18N.slice('final_answer_format')}",
--- a/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
+++ b/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
@@ -349,8 +349,18 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
            # Invoke step callback if configured
            self._invoke_step_callback(result)

-            # Append result message to conversation state
-            if hasattr(result, "text"):
+            # Properly attribute messages to avoid LLM hallucination of observations:
+            # - LLM's response goes as assistant message
+            # - Tool observation goes as user message (not assistant)
+            if isinstance(result, AgentAction) and result.llm_response:
+                # For tool use: append LLM response as assistant, observation as user
+                self._append_message_to_state(result.llm_response)
+                if result.result:
+                    self._append_message_to_state(
+                        f"Observation: {result.result}", role="user"
+                    )
+            elif hasattr(result, "text"):
+                # For final answer or other cases: append text as assistant
                self._append_message_to_state(result.text)

            # Check if tool result became a final answer (result_as_answer flag)
@@ -537,15 +547,19 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
        Returns:
            Updated action or final answer.
        """
+        # Special case for add_image_tool
+        # Note: Even for add_image_tool, we should not attribute tool output to assistant
+        # to avoid LLM hallucination. The LLM's action is stored as assistant message,
+        # and the tool result (image) is stored as user message.
        add_image_tool = self._i18n.tools("add_image")
        if (
            isinstance(add_image_tool, dict)
            and formatted_answer.tool.casefold().strip()
            == add_image_tool.get("name", "").casefold().strip()
        ):
-            self.state.messages.append(
-                {"role": "assistant", "content": tool_result.result}
-            )
+            # Store original LLM response for proper message attribution
+            formatted_answer.llm_response = formatted_answer.text
+            formatted_answer.result = tool_result.result
            return formatted_answer

        return handle_agent_action_core(
--- a/lib/crewai/src/crewai/lite_agent.py
+++ b/lib/crewai/src/crewai/lite_agent.py
@@ -582,7 +582,19 @@ class LiteAgent(FlowTrackable, BaseModel):
                        show_logs=self._show_logs,
                    )

-                self._append_message(formatted_answer.text, role="assistant")
+                # Properly attribute messages to avoid LLM hallucination of observations:
+                # - LLM's response goes as assistant message
+                # - Tool observation goes as user message (not assistant)
+                if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
+                    # For tool use: append LLM response as assistant, observation as user
+                    self._append_message(formatted_answer.llm_response, role="assistant")
+                    if formatted_answer.result:
+                        self._append_message(
+                            f"Observation: {formatted_answer.result}", role="user"
+                        )
+                else:
+                    # For final answer or other cases: append text as assistant
+                    self._append_message(formatted_answer.text, role="assistant")
            except OutputParserError as e:  # noqa: PERF203
                self._printer.print(
                    content="Failed to parse LLM output. Retrying...",
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -382,10 +382,21 @@ def handle_agent_action_core(

    Notes:
        - TODO: Remove messages parameter and its usage.
+        - The observation is appended to formatted_answer.text for logging/trace
+          purposes, but callers should use formatted_answer.llm_response for the
+          assistant message (without observation) and append the observation
+          separately as a user message to avoid LLM hallucination of observations.
    """
    if step_callback:
        step_callback(tool_result)

+    # Store the original LLM response before appending observation
+    # This is used by executors to correctly attribute messages:
+    # - llm_response goes as assistant message
+    # - observation goes as user message (to prevent LLM hallucination)
+    formatted_answer.llm_response = formatted_answer.text
+
+    # Append observation to text for logging/trace purposes
    formatted_answer.text += f"\nObservation: {tool_result.result}"
    formatted_answer.result = tool_result.result

--- a/lib/crewai/tests/agents/test_crew_agent_parser.py
+++ b/lib/crewai/tests/agents/test_crew_agent_parser.py
@@ -360,92 +360,3 @@ def test_integration_valid_and_invalid():


 # TODO: ADD TEST TO MAKE SURE ** REMOVAL DOESN'T MESS UP ANYTHING
-
-
-# Tests for Action: None handling (Issue #4186)
-def test_action_none_basic():
-    """Test that 'Action: None' is parsed as AgentFinish."""
-    text = "Thought: I cannot use any tool for this.\nAction: None"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "I cannot use any tool for this." in result.output
-
-
-def test_action_none_with_reason_in_parentheses():
-    """Test 'Action: None (reason)' format."""
-    text = "Thought: The tool is not available.\nAction: None (direct response required)"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "The tool is not available." in result.output
-
-
-def test_action_none_lowercase():
-    """Test that 'Action: none' (lowercase) is handled."""
-    text = "Thought: I should respond directly.\nAction: none"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "I should respond directly." in result.output
-
-
-def test_action_na():
-    """Test that 'Action: N/A' is handled."""
-    text = "Thought: No action needed here.\nAction: N/A"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "No action needed here." in result.output
-
-
-def test_action_na_lowercase():
-    """Test that 'Action: n/a' (lowercase) is handled."""
-    text = "Thought: This requires a direct answer.\nAction: n/a"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "This requires a direct answer." in result.output
-
-
-def test_action_none_with_dash_separator():
-    """Test 'Action: None - reason' format."""
-    text = "Thought: I need to provide a direct response.\nAction: None - direct response"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "I need to provide a direct response." in result.output
-
-
-def test_action_none_with_additional_content():
-    """Test 'Action: None' with additional content after."""
-    text = "Thought: I analyzed the request.\nAction: None\nHere is my direct response to your question."
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "I analyzed the request." in result.output
-
-
-def test_action_no_action():
-    """Test that 'Action: no action' is handled."""
-    text = "Thought: I will respond without using tools.\nAction: no action"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "I will respond without using tools." in result.output
-
-
-def test_action_none_without_thought():
-    """Test 'Action: None' without a thought prefix."""
-    text = "Action: None"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert result.output == "I cannot perform this action with the available tools."
-
-
-def test_action_none_preserves_original_text():
-    """Test that the original text is preserved in the result."""
-    text = "Thought: I cannot delegate this task.\nAction: None"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert result.text == text
-
-
-def test_action_none_with_colon_separator():
-    """Test 'Action: None: reason' format."""
-    text = "Thought: Direct response needed.\nAction: None: providing direct answer"
-    result = parser.parse(text)
-    assert isinstance(result, AgentFinish)
-    assert "Direct response needed." in result.output
--- a/lib/crewai/tests/agents/test_observation_message_attribution.py
+++ b/lib/crewai/tests/agents/test_observation_message_attribution.py
@@ -0,0 +1,320 @@
+"""Tests for proper message attribution to prevent LLM observation hallucination.
+
+This module tests that tool observations are correctly attributed to user messages
+rather than assistant messages, which prevents the LLM from learning to hallucinate
+fake observations during tool calls.
+
+Related to GitHub issue #4181.
+"""
+
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from crewai.agents.crew_agent_executor import CrewAgentExecutor
+from crewai.agents.parser import AgentAction, AgentFinish
+from crewai.tools.tool_types import ToolResult
+from crewai.utilities.agent_utils import handle_agent_action_core
+
+
+@pytest.fixture
+def mock_llm() -> MagicMock:
+    """Create a mock LLM for testing."""
+    llm = MagicMock()
+    llm.supports_stop_words.return_value = True
+    llm.stop = []
+    return llm
+
+
+@pytest.fixture
+def mock_agent() -> MagicMock:
+    """Create a mock agent for testing."""
+    agent = MagicMock()
+    agent.role = "Test Agent"
+    agent.key = "test_agent_key"
+    agent.verbose = False
+    agent.id = "test_agent_id"
+    return agent
+
+
+@pytest.fixture
+def mock_task() -> MagicMock:
+    """Create a mock task for testing."""
+    task = MagicMock()
+    task.description = "Test task description"
+    return task
+
+
+@pytest.fixture
+def mock_crew() -> MagicMock:
+    """Create a mock crew for testing."""
+    crew = MagicMock()
+    crew.verbose = False
+    crew._train = False
+    return crew
+
+
+@pytest.fixture
+def mock_tools_handler() -> MagicMock:
+    """Create a mock tools handler."""
+    return MagicMock()
+
+
+@pytest.fixture
+def executor(
+    mock_llm: MagicMock,
+    mock_agent: MagicMock,
+    mock_task: MagicMock,
+    mock_crew: MagicMock,
+    mock_tools_handler: MagicMock,
+) -> CrewAgentExecutor:
+    """Create a CrewAgentExecutor instance for testing."""
+    return CrewAgentExecutor(
+        llm=mock_llm,
+        task=mock_task,
+        crew=mock_crew,
+        agent=mock_agent,
+        prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
+        max_iter=5,
+        tools=[],
+        tools_names="",
+        stop_words=["Observation:"],
+        tools_description="",
+        tools_handler=mock_tools_handler,
+    )
+
+
+class TestHandleAgentActionCore:
+    """Tests for handle_agent_action_core function."""
+
+    def test_stores_llm_response_before_observation(self) -> None:
+        """Test that llm_response is stored before observation is appended."""
+        original_text = "Thought: I need to search\nAction: search\nAction Input: query"
+        action = AgentAction(
+            thought="I need to search",
+            tool="search",
+            tool_input="query",
+            text=original_text,
+        )
+        tool_result = ToolResult(result="Search result: found data", result_as_answer=False)
+
+        result = handle_agent_action_core(
+            formatted_answer=action,
+            tool_result=tool_result,
+        )
+
+        assert isinstance(result, AgentAction)
+        assert result.llm_response == original_text
+        assert "Observation:" in result.text
+        assert result.result == "Search result: found data"
+
+    def test_text_contains_observation_for_logging(self) -> None:
+        """Test that text contains observation for logging purposes."""
+        action = AgentAction(
+            thought="Testing",
+            tool="test_tool",
+            tool_input="{}",
+            text="Thought: Testing\nAction: test_tool\nAction Input: {}",
+        )
+        tool_result = ToolResult(result="Tool output", result_as_answer=False)
+
+        result = handle_agent_action_core(
+            formatted_answer=action,
+            tool_result=tool_result,
+        )
+
+        assert isinstance(result, AgentAction)
+        assert "Observation: Tool output" in result.text
+
+    def test_result_as_answer_returns_agent_finish(self) -> None:
+        """Test that result_as_answer=True returns AgentFinish."""
+        action = AgentAction(
+            thought="Using tool",
+            tool="final_tool",
+            tool_input="{}",
+            text="Thought: Using tool\nAction: final_tool\nAction Input: {}",
+        )
+        tool_result = ToolResult(result="Final answer from tool", result_as_answer=True)
+
+        result = handle_agent_action_core(
+            formatted_answer=action,
+            tool_result=tool_result,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.output == "Final answer from tool"
+
+
+class TestCrewAgentExecutorMessageAttribution:
+    """Tests for proper message attribution in CrewAgentExecutor."""
+
+    def test_tool_observation_not_in_assistant_message(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that tool observations are not attributed to assistant messages.
+
+        This is the core fix for GitHub issue #4181 - observations should be
+        in user messages, not assistant messages, to prevent LLM hallucination.
+        """
+        call_count = 0
+
+        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (
+                    "Thought: I need to use a tool\n"
+                    "Action: test_tool\n"
+                    'Action Input: {"arg": "value"}'
+                )
+            return "Thought: I have the answer\nFinal Answer: Done"
+
+        with patch(
+            "crewai.agents.crew_agent_executor.get_llm_response",
+            side_effect=mock_llm_response,
+        ):
+            with patch(
+                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
+                return_value=ToolResult(
+                    result="Tool executed successfully", result_as_answer=False
+                ),
+            ):
+                with patch.object(executor, "_show_logs"):
+                    result = executor._invoke_loop()
+
+        assert isinstance(result, AgentFinish)
+
+        assistant_messages = [
+            msg for msg in executor.messages if msg.get("role") == "assistant"
+        ]
+        user_messages = [msg for msg in executor.messages if msg.get("role") == "user"]
+
+        for msg in assistant_messages:
+            content = msg.get("content", "")
+            assert "Observation:" not in content, (
+                f"Assistant message should not contain 'Observation:'. "
+                f"Found: {content[:100]}..."
+            )
+
+        observation_in_user = any(
+            "Observation:" in msg.get("content", "") for msg in user_messages
+        )
+        assert observation_in_user, (
+            "Tool observation should be in a user message, not assistant message"
+        )
+
+    def test_llm_response_in_assistant_message(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that the LLM's actual response is in assistant messages."""
+        call_count = 0
+        llm_action_text = (
+            "Thought: I need to use a tool\n"
+            "Action: test_tool\n"
+            'Action Input: {"arg": "value"}'
+        )
+
+        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return llm_action_text
+            return "Thought: I have the answer\nFinal Answer: Done"
+
+        with patch(
+            "crewai.agents.crew_agent_executor.get_llm_response",
+            side_effect=mock_llm_response,
+        ):
+            with patch(
+                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
+                return_value=ToolResult(
+                    result="Tool executed successfully", result_as_answer=False
+                ),
+            ):
+                with patch.object(executor, "_show_logs"):
+                    executor._invoke_loop()
+
+        assistant_messages = [
+            msg for msg in executor.messages if msg.get("role") == "assistant"
+        ]
+
+        llm_response_found = any(
+            "Action: test_tool" in msg.get("content", "") for msg in assistant_messages
+        )
+        assert llm_response_found, (
+            "LLM's action response should be in an assistant message"
+        )
+
+    def test_message_order_after_tool_use(
+        self, executor: CrewAgentExecutor
+    ) -> None:
+        """Test that messages are in correct order: assistant (action) then user (observation)."""
+        call_count = 0
+
+        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return (
+                    "Thought: I need to use a tool\n"
+                    "Action: test_tool\n"
+                    'Action Input: {"arg": "value"}'
+                )
+            return "Thought: I have the answer\nFinal Answer: Done"
+
+        with patch(
+            "crewai.agents.crew_agent_executor.get_llm_response",
+            side_effect=mock_llm_response,
+        ):
+            with patch(
+                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
+                return_value=ToolResult(
+                    result="Tool executed successfully", result_as_answer=False
+                ),
+            ):
+                with patch.object(executor, "_show_logs"):
+                    executor._invoke_loop()
+
+        action_msg_idx = None
+        observation_msg_idx = None
+
+        for i, msg in enumerate(executor.messages):
+            content = msg.get("content", "")
+            if "Action: test_tool" in content and msg.get("role") == "assistant":
+                action_msg_idx = i
+            if "Observation:" in content and msg.get("role") == "user":
+                observation_msg_idx = i
+
+        assert action_msg_idx is not None, "Action message not found"
+        assert observation_msg_idx is not None, "Observation message not found"
+        assert observation_msg_idx == action_msg_idx + 1, (
+            f"Observation (user) should immediately follow action (assistant). "
+            f"Action at {action_msg_idx}, Observation at {observation_msg_idx}"
+        )
+
+
+class TestAgentActionLlmResponseField:
+    """Tests for the llm_response field on AgentAction."""
+
+    def test_agent_action_has_llm_response_field(self) -> None:
+        """Test that AgentAction has llm_response field."""
+        action = AgentAction(
+            thought="Test",
+            tool="test",
+            tool_input="{}",
+            text="Test text",
+        )
+        assert hasattr(action, "llm_response")
+        assert action.llm_response is None
+
+    def test_agent_action_llm_response_can_be_set(self) -> None:
+        """Test that llm_response can be set on AgentAction."""
+        action = AgentAction(
+            thought="Test",
+            tool="test",
+            tool_input="{}",
+            text="Test text",
+        )
+        action.llm_response = "Original LLM response"
+        assert action.llm_response == "Original LLM response"