From 5d757cb6262b572dc6289907f8bf9068fa70bc1e Mon Sep 17 00:00:00 2001
From: Greyson LaLonde <greyson.r.lalonde@gmail.com>
Date: Tue, 12 May 2026 00:26:31 +0800
Subject: [PATCH] fix(flow): log HITL pre-review and distillation failures, add
 learn_strict

---
 lib/crewai/src/crewai/flow/human_feedback.py  |  43 +++++-
 .../tests/test_human_feedback_decorator.py    | 128 ++++++++++++++++++
 2 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/lib/crewai/src/crewai/flow/human_feedback.py b/lib/crewai/src/crewai/flow/human_feedback.py
index e6a51d9da..5278d0073 100644
--- a/lib/crewai/src/crewai/flow/human_feedback.py
+++ b/lib/crewai/src/crewai/flow/human_feedback.py
@@ -60,6 +60,7 @@ from collections.abc import Callable, Sequence
 from dataclasses import dataclass, field
 from datetime import datetime
 from functools import wraps
+import logging
 from typing import TYPE_CHECKING, Any, TypeVar
 
 from pydantic import BaseModel, Field
@@ -73,6 +74,8 @@ if TYPE_CHECKING:
     from crewai.llms.base_llm import BaseLLM
 
 
+logger = logging.getLogger(__name__)
+
 F = TypeVar("F", bound=Callable[..., Any])
 
 
@@ -188,6 +191,7 @@ class HumanFeedbackConfig:
     provider: HumanFeedbackProvider | None = None
     learn: bool = False
     learn_source: str = "hitl"
+    learn_strict: bool = False
 
 
 class HumanFeedbackMethod(FlowMethod[Any, Any]):
@@ -237,6 +241,7 @@ def human_feedback(
     provider: HumanFeedbackProvider | None = None,
     learn: bool = False,
     learn_source: str = "hitl",
+    learn_strict: bool = False,
 ) -> Callable[[F], F]:
     """Decorator for Flow methods that require human feedback.
 
@@ -275,6 +280,14 @@ def human_feedback(
             external systems like Slack, Teams, or webhooks. When the
             provider raises HumanFeedbackPending, the flow pauses and
             can be resumed later with Flow.resume().
+        learn: Enable HITL learning. Recall past lessons to pre-review
+            output before the human sees it, and distill new lessons
+            from feedback after.
+        learn_source: Memory source tag for stored/recalled lessons.
+        learn_strict: When True, re-raise exceptions from the pre-review
+            and distillation steps instead of falling back to raw output.
+            Default False preserves graceful degradation; failures are
+            always logged via ``logger.warning`` regardless of this flag.
 
     Returns:
         A decorator function that wraps the method with human feedback
@@ -404,7 +417,19 @@ def human_feedback(
                 reviewed = llm_inst.call(messages)
                 return reviewed if isinstance(reviewed, str) else str(reviewed)
             except Exception:
-                return method_output  # fallback to raw output on any failure
+                if learn_strict:
+                    logger.warning(
+                        "HITL pre-review failed for %s; re-raising (learn_strict=True)",
+                        func.__name__,
+                        exc_info=True,
+                    )
+                    raise
+                logger.warning(
+                    "HITL pre-review failed for %s; falling back to raw output",
+                    func.__name__,
+                    exc_info=True,
+                )
+                return method_output
 
         def _distill_and_store_lessons(
             flow_instance: Flow[Any], method_output: Any, raw_feedback: str
@@ -446,8 +471,19 @@ def human_feedback(
 
                 if lessons:
                     mem.remember_many(lessons, source=learn_source)  # type: ignore[union-attr]
-            except Exception:  # noqa: S110
-                pass  # non-critical: don't fail the flow because lesson storage failed
+            except Exception:
+                if learn_strict:
+                    logger.warning(
+                        "HITL lesson distillation failed for %s; re-raising (learn_strict=True)",
+                        func.__name__,
+                        exc_info=True,
+                    )
+                    raise
+                logger.warning(
+                    "HITL lesson distillation failed for %s; no lessons stored",
+                    func.__name__,
+                    exc_info=True,
+                )
 
         # -- Core feedback helpers ------------------------------------
 
@@ -654,6 +690,7 @@ def human_feedback(
             provider=provider,
             learn=learn,
             learn_source=learn_source,
+            learn_strict=learn_strict,
         )
         wrapper.__is_flow_method__ = True
 
diff --git a/lib/crewai/tests/test_human_feedback_decorator.py b/lib/crewai/tests/test_human_feedback_decorator.py
index 68371eb0d..fef227f32 100644
--- a/lib/crewai/tests/test_human_feedback_decorator.py
+++ b/lib/crewai/tests/test_human_feedback_decorator.py
@@ -596,6 +596,134 @@ class TestHumanFeedbackLearn:
         # llm defaults to "gpt-4o-mini" at the function level
         assert config.llm == "gpt-4o-mini"
 
+    def test_pre_review_failure_logs_and_returns_raw_output(self, caplog):
+        """Pre-review LLM failure falls back to raw output AND logs a warning."""
+        from crewai.memory.types import MemoryMatch, MemoryRecord
+
+        class LearnFlow(Flow):
+            @start()
+            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
+            def produce(self):
+                return "raw draft"
+
+        flow = LearnFlow()
+        flow.memory = MagicMock()
+        flow.memory.recall.return_value = [
+            MemoryMatch(
+                record=MemoryRecord(content="some lesson", embedding=[]),
+                score=0.9,
+                match_reasons=["semantic"],
+            )
+        ]
+
+        captured: dict[str, Any] = {}
+
+        def capture_feedback(message, output, metadata=None, emit=None):
+            captured["shown_to_human"] = output
+            return ""  # empty -> no distillation path
+
+        with (
+            patch.object(flow, "_request_human_feedback", side_effect=capture_feedback),
+            patch("crewai.llm.LLM") as MockLLM,
+            caplog.at_level("WARNING", logger="crewai.flow.human_feedback"),
+        ):
+            mock_llm = MagicMock()
+            mock_llm.supports_function_calling.return_value = True
+            mock_llm.call.side_effect = RuntimeError("simulated pre-review failure")
+            MockLLM.return_value = mock_llm
+
+            flow.produce()
+
+        assert captured["shown_to_human"] == "raw draft"
+        assert any(
+            "HITL pre-review failed" in rec.message
+            and rec.levelname == "WARNING"
+            and rec.exc_info is not None
+            for rec in caplog.records
+        )
+
+    def test_pre_review_failure_strict_reraises(self):
+        """When learn_strict=True, pre-review failures propagate instead of falling back."""
+        from crewai.memory.types import MemoryMatch, MemoryRecord
+
+        class LearnFlow(Flow):
+            @start()
+            @human_feedback(
+                message="Review:",
+                llm="gpt-4o-mini",
+                learn=True,
+                learn_strict=True,
+            )
+            def produce(self):
+                return "raw draft"
+
+        flow = LearnFlow()
+        flow.memory = MagicMock()
+        flow.memory.recall.return_value = [
+            MemoryMatch(
+                record=MemoryRecord(content="some lesson", embedding=[]),
+                score=0.9,
+                match_reasons=["semantic"],
+            )
+        ]
+
+        with (
+            patch.object(flow, "_request_human_feedback", return_value=""),
+            patch("crewai.llm.LLM") as MockLLM,
+        ):
+            mock_llm = MagicMock()
+            mock_llm.supports_function_calling.return_value = True
+            mock_llm.call.side_effect = RuntimeError("simulated pre-review failure")
+            MockLLM.return_value = mock_llm
+
+            with pytest.raises(RuntimeError, match="simulated pre-review failure"):
+                flow.produce()
+
+    def test_distillation_failure_logs_and_does_not_block_flow(self, caplog):
+        """Distillation LLM failure logs a warning but does not break the flow."""
+
+        class LearnFlow(Flow):
+            @start()
+            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
+            def produce(self):
+                return "raw draft"
+
+        flow = LearnFlow()
+        flow.memory = MagicMock()
+        flow.memory.recall.return_value = []  # no pre-review path
+
+        with (
+            patch.object(
+                flow, "_request_human_feedback", return_value="please add citations"
+            ),
+            patch("crewai.llm.LLM") as MockLLM,
+            caplog.at_level("WARNING", logger="crewai.flow.human_feedback"),
+        ):
+            mock_llm = MagicMock()
+            mock_llm.supports_function_calling.return_value = True
+            mock_llm.call.side_effect = RuntimeError("simulated distill failure")
+            MockLLM.return_value = mock_llm
+
+            flow.produce()  # must not raise
+
+        flow.memory.remember_many.assert_not_called()
+        assert any(
+            "HITL lesson distillation failed" in rec.message
+            and rec.levelname == "WARNING"
+            for rec in caplog.records
+        )
+
+    def test_learn_strict_config_propagates(self):
+        """learn_strict is captured on the decorator config."""
+
+        @human_feedback(message="Review:", learn=True, learn_strict=True)
+        def test_method(self):
+            return "output"
+
+        config = test_method.__human_feedback_config__
+        assert config is not None
+        assert config.learn_strict is True
+
 
 class TestHumanFeedbackFinalOutputPreservation:
     """Tests for preserving method return value as flow's final output when @human_feedback with emit is terminal.