fix: update observation handling in PlannerObserver for LLM errors

- Modified the error handling in the PlannerObserver to default to a conservative replan when an LLM call fails.
- Updated the return values to indicate that the step was not completed successfully and that a full replan is needed.
- Added a new test to verify the behavior of the observer when an LLM error occurs, ensuring the correct replan logic is triggered.
This commit is contained in:
lorenzejay
2026-02-25 13:44:50 -08:00
parent 687d6abdaa
commit 76f329a025
2 changed files with 66 additions and 4 deletions

View File

@@ -167,7 +167,9 @@ class PlannerObserver:
return observation
except Exception as e:
logger.warning(f"Observation LLM call failed: {e}. Defaulting to continue.")
logger.warning(
f"Observation LLM call failed: {e}. Defaulting to conservative replan."
)
crewai_event_bus.emit(
self.agent,
@@ -182,9 +184,11 @@ class PlannerObserver:
)
return StepObservation(
step_completed_successfully=True,
step_completed_successfully=False,
key_information_learned="",
remaining_plan_still_valid=True,
remaining_plan_still_valid=False,
needs_full_replan=True,
replan_reason="Observer failed to evaluate step result safely",
)
def apply_refinements(
@@ -273,4 +277,3 @@ class PlannerObserver:
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]

View File

@@ -11,6 +11,7 @@ from unittest.mock import AsyncMock, Mock, patch
import pytest
from crewai.agents.step_executor import StepExecutor
from crewai.agents.planner_observer import PlannerObserver
from crewai.experimental.agent_executor import (
AgentReActState,
AgentExecutor,
@@ -848,6 +849,64 @@ class TestNativeToolExecution:
assert len(tool_messages) == 1
assert tool_messages[0]["tool_call_id"] == "call_1"
def test_check_native_todo_completion_requires_expected_tool_match(
self, mock_dependencies
):
from crewai.utilities.planning_types import TodoList
executor = AgentExecutor(**mock_dependencies)
running = TodoItem(
step_number=1,
description="Use the expected tool",
tool_to_use="expected_tool",
status="running",
)
executor.state.todos = TodoList(items=[running])
executor.state.last_native_tools_executed = ["other_tool"]
assert executor.check_native_todo_completion() == "todo_not_satisfied"
executor.state.last_native_tools_executed = ["expected_tool"]
assert executor.check_native_todo_completion() == "todo_satisfied"
running.tool_to_use = None
executor.state.last_native_tools_executed = ["any_tool"]
assert executor.check_native_todo_completion() == "todo_not_satisfied"
class TestPlannerObserver:
def test_observe_fallback_is_conservative_on_llm_error(self):
llm = Mock()
llm.call.side_effect = RuntimeError("llm unavailable")
agent = Mock()
agent.role = "Observer Test Agent"
agent.llm = llm
agent.planning_config = None
task = Mock()
task.description = "Test task"
task.expected_output = "Expected result"
observer = PlannerObserver(agent=agent, task=task)
completed_step = TodoItem(
step_number=1,
description="Do something",
status="running",
)
observation = observer.observe(
completed_step=completed_step,
result="Error: tool timeout",
all_completed=[],
remaining_todos=[],
)
assert observation.step_completed_successfully is False
assert observation.remaining_plan_still_valid is False
assert observation.needs_full_replan is True
assert observation.replan_reason == "Observer failed to evaluate step result safely"
class TestAgentExecutorPlanning:
"""Test planning functionality in AgentExecutor with real agent kickoff."""