fix: update observation handling in PlannerObserver for LLM errors

- Modified the error handling in the PlannerObserver to default to a conservative replan when an LLM call fails. - Updated the return values to indicate that the step was not completed successfully and that a full replan is needed. - Added a new test to verify the behavior of the observer when an LLM error occurs, ensuring the correct replan logic is triggered.
2026-04-30 14:52:36 +00:00 · 2026-02-25 13:44:50 -08:00
parent 687d6abdaa
commit 76f329a025
2 changed files with 66 additions and 4 deletions
--- a/lib/crewai/src/crewai/agents/planner_observer.py
+++ b/lib/crewai/src/crewai/agents/planner_observer.py
@@ -167,7 +167,9 @@ class PlannerObserver:
            return observation

        except Exception as e:
-            logger.warning(f"Observation LLM call failed: {e}. Defaulting to continue.")
+            logger.warning(
+                f"Observation LLM call failed: {e}. Defaulting to conservative replan."
+            )

            crewai_event_bus.emit(
                self.agent,
@@ -182,9 +184,11 @@ class PlannerObserver:
            )

            return StepObservation(
-                step_completed_successfully=True,
+                step_completed_successfully=False,
                key_information_learned="",
-                remaining_plan_still_valid=True,
+                remaining_plan_still_valid=False,
+                needs_full_replan=True,
+                replan_reason="Observer failed to evaluate step result safely",
            )

    def apply_refinements(
@@ -273,4 +277,3 @@ class PlannerObserver:
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
-
--- a/lib/crewai/tests/agents/test_agent_executor.py
+++ b/lib/crewai/tests/agents/test_agent_executor.py
@@ -11,6 +11,7 @@ from unittest.mock import AsyncMock, Mock, patch
 import pytest

 from crewai.agents.step_executor import StepExecutor
+from crewai.agents.planner_observer import PlannerObserver
 from crewai.experimental.agent_executor import (
    AgentReActState,
    AgentExecutor,
@@ -848,6 +849,64 @@ class TestNativeToolExecution:
        assert len(tool_messages) == 1
        assert tool_messages[0]["tool_call_id"] == "call_1"

+    def test_check_native_todo_completion_requires_expected_tool_match(
+        self, mock_dependencies
+    ):
+        from crewai.utilities.planning_types import TodoList
+
+        executor = AgentExecutor(**mock_dependencies)
+        running = TodoItem(
+            step_number=1,
+            description="Use the expected tool",
+            tool_to_use="expected_tool",
+            status="running",
+        )
+        executor.state.todos = TodoList(items=[running])
+
+        executor.state.last_native_tools_executed = ["other_tool"]
+        assert executor.check_native_todo_completion() == "todo_not_satisfied"
+
+        executor.state.last_native_tools_executed = ["expected_tool"]
+        assert executor.check_native_todo_completion() == "todo_satisfied"
+
+        running.tool_to_use = None
+        executor.state.last_native_tools_executed = ["any_tool"]
+        assert executor.check_native_todo_completion() == "todo_not_satisfied"
+
+
+class TestPlannerObserver:
+    def test_observe_fallback_is_conservative_on_llm_error(self):
+        llm = Mock()
+        llm.call.side_effect = RuntimeError("llm unavailable")
+
+        agent = Mock()
+        agent.role = "Observer Test Agent"
+        agent.llm = llm
+        agent.planning_config = None
+
+        task = Mock()
+        task.description = "Test task"
+        task.expected_output = "Expected result"
+
+        observer = PlannerObserver(agent=agent, task=task)
+
+        completed_step = TodoItem(
+            step_number=1,
+            description="Do something",
+            status="running",
+        )
+        observation = observer.observe(
+            completed_step=completed_step,
+            result="Error: tool timeout",
+            all_completed=[],
+            remaining_todos=[],
+        )
+
+        assert observation.step_completed_successfully is False
+        assert observation.remaining_plan_still_valid is False
+        assert observation.needs_full_replan is True
+        assert observation.replan_reason == "Observer failed to evaluate step result safely"
+

 class TestAgentExecutorPlanning:
    """Test planning functionality in AgentExecutor with real agent kickoff."""