diff --git a/lib/crewai/src/crewai/flow/flow.py b/lib/crewai/src/crewai/flow/flow.py index f1e75e617..0624f7bec 100644 --- a/lib/crewai/src/crewai/flow/flow.py +++ b/lib/crewai/src/crewai/flow/flow.py @@ -883,6 +883,9 @@ class Flow(Generic[T], metaclass=FlowMeta): self.human_feedback_history: list[HumanFeedbackResult] = [] self.last_human_feedback: HumanFeedbackResult | None = None self._pending_feedback_context: PendingFeedbackContext | None = None + # Per-method stash for real @human_feedback output (keyed by method name) + # Used to decouple routing outcome from method return value when emit is set + self._human_feedback_method_outputs: dict[str, Any] = {} self.suppress_flow_events: bool = suppress_flow_events # User input history (for self.ask()) @@ -2290,6 +2293,17 @@ class Flow(Generic[T], metaclass=FlowMeta): result = await result self._method_outputs.append(result) + + # For @human_feedback methods with emit, the result is the collapsed outcome + # (e.g., "approved") used for routing. But we want the actual method output + # to be the stored result (for final flow output). Replace the last entry + # if a stashed output exists. Dict-based stash is concurrency-safe and + # handles None return values (presence in dict = stashed, not value). + if method_name in self._human_feedback_method_outputs: + self._method_outputs[-1] = self._human_feedback_method_outputs.pop( + method_name + ) + self._method_execution_counts[method_name] = ( self._method_execution_counts.get(method_name, 0) + 1 ) diff --git a/lib/crewai/src/crewai/flow/human_feedback.py b/lib/crewai/src/crewai/flow/human_feedback.py index e43fc3337..5fedbd3a2 100644 --- a/lib/crewai/src/crewai/flow/human_feedback.py +++ b/lib/crewai/src/crewai/flow/human_feedback.py @@ -591,6 +591,13 @@ def human_feedback( ): _distill_and_store_lessons(self, method_output, raw_feedback) + # Stash the real method output for final flow result when emit is set + # (result is the collapsed outcome string for routing, but we want to + # preserve the actual method output as the flow's final result) + # Uses per-method dict for concurrency safety and to handle None returns + if emit: + self._human_feedback_method_outputs[func.__name__] = method_output + return result wrapper: Any = async_wrapper @@ -615,6 +622,13 @@ def human_feedback( ): _distill_and_store_lessons(self, method_output, raw_feedback) + # Stash the real method output for final flow result when emit is set + # (result is the collapsed outcome string for routing, but we want to + # preserve the actual method output as the flow's final result) + # Uses per-method dict for concurrency safety and to handle None returns + if emit: + self._human_feedback_method_outputs[func.__name__] = method_output + return result wrapper = sync_wrapper diff --git a/lib/crewai/tests/test_human_feedback_decorator.py b/lib/crewai/tests/test_human_feedback_decorator.py index 23b3d723b..68371eb0d 100644 --- a/lib/crewai/tests/test_human_feedback_decorator.py +++ b/lib/crewai/tests/test_human_feedback_decorator.py @@ -246,7 +246,7 @@ class TestHumanFeedbackExecution: @patch("builtins.input", return_value="") @patch("builtins.print") def test_empty_feedback_with_default_outcome(self, mock_print, mock_input): - """Test empty feedback uses default_outcome.""" + """Test empty feedback uses default_outcome for routing, but flow returns method output.""" class TestFlow(Flow): @start() @@ -264,14 +264,16 @@ class TestHumanFeedbackExecution: with patch.object(flow, "_request_human_feedback", return_value=""): result = flow.kickoff() - assert result == "needs_work" + # Flow result is the method's return value, NOT the collapsed outcome + assert result == "Content" assert flow.last_human_feedback is not None + # But the outcome is still correctly set for routing purposes assert flow.last_human_feedback.outcome == "needs_work" @patch("builtins.input", return_value="Approved!") @patch("builtins.print") def test_feedback_collapsing(self, mock_print, mock_input): - """Test that feedback is collapsed to an outcome.""" + """Test that feedback is collapsed to an outcome for routing, but flow returns method output.""" class TestFlow(Flow): @start() @@ -291,8 +293,10 @@ class TestHumanFeedbackExecution: ): result = flow.kickoff() - assert result == "approved" + # Flow result is the method's return value, NOT the collapsed outcome + assert result == "Content" assert flow.last_human_feedback is not None + # But the outcome is still correctly set for routing purposes assert flow.last_human_feedback.outcome == "approved" @@ -591,3 +595,162 @@ class TestHumanFeedbackLearn: assert config.learn is True # llm defaults to "gpt-4o-mini" at the function level assert config.llm == "gpt-4o-mini" + + +class TestHumanFeedbackFinalOutputPreservation: + """Tests for preserving method return value as flow's final output when @human_feedback with emit is terminal. + + This addresses the bug where the flow's final output was the collapsed outcome string (e.g., 'approved') + instead of the method's actual return value when a @human_feedback method with emit is the final method. + """ + + @patch("builtins.input", return_value="Looks good!") + @patch("builtins.print") + def test_final_output_is_method_return_not_collapsed_outcome( + self, mock_print, mock_input + ): + """When @human_feedback with emit is the final method, flow output is the method's return value.""" + + class FinalHumanFeedbackFlow(Flow): + @start() + @human_feedback( + message="Review this content:", + emit=["approved", "rejected"], + llm="gpt-4o-mini", + ) + def generate_and_review(self): + # This dict should be the final output, NOT the string 'approved' + return {"title": "My Article", "content": "Article content here", "status": "ready"} + + flow = FinalHumanFeedbackFlow() + + with ( + patch.object(flow, "_request_human_feedback", return_value="Looks great, approved!"), + patch.object(flow, "_collapse_to_outcome", return_value="approved"), + ): + result = flow.kickoff() + + # The final output should be the actual method return value, not the collapsed outcome + assert isinstance(result, dict), f"Expected dict, got {type(result).__name__}: {result}" + assert result == {"title": "My Article", "content": "Article content here", "status": "ready"} + # But the outcome should still be tracked in last_human_feedback + assert flow.last_human_feedback is not None + assert flow.last_human_feedback.outcome == "approved" + + @patch("builtins.input", return_value="approved") + @patch("builtins.print") + def test_routing_still_works_with_downstream_listener(self, mock_print, mock_input): + """When @human_feedback has a downstream listener, routing still triggers the listener.""" + publish_called = [] + + class RoutingFlow(Flow): + @start() + @human_feedback( + message="Review:", + emit=["approved", "rejected"], + llm="gpt-4o-mini", + ) + def review(self): + return {"content": "original content"} + + @listen("approved") + def publish(self): + publish_called.append(True) + return {"published": True, "timestamp": "2024-01-01"} + + flow = RoutingFlow() + + with ( + patch.object(flow, "_request_human_feedback", return_value="LGTM"), + patch.object(flow, "_collapse_to_outcome", return_value="approved"), + ): + result = flow.kickoff() + + # The downstream listener should have been triggered + assert len(publish_called) == 1, "publish() should have been called" + # The final output should be from the listener, not the human_feedback method + assert result == {"published": True, "timestamp": "2024-01-01"} + + @patch("builtins.input", return_value="") + @patch("builtins.print") + @pytest.mark.asyncio + async def test_async_human_feedback_final_output_preserved(self, mock_print, mock_input): + """Async @human_feedback methods also preserve the real return value.""" + + class AsyncFinalFlow(Flow): + @start() + @human_feedback( + message="Review async content:", + emit=["approved", "rejected"], + llm="gpt-4o-mini", + default_outcome="approved", + ) + async def async_generate(self): + return {"async_data": "value", "computed": 42} + + flow = AsyncFinalFlow() + + with ( + patch.object(flow, "_request_human_feedback", return_value=""), + ): + result = await flow.kickoff_async() + + # The final output should be the dict, not "approved" + assert isinstance(result, dict), f"Expected dict, got {type(result).__name__}: {result}" + assert result == {"async_data": "value", "computed": 42} + assert flow.last_human_feedback.outcome == "approved" + + @patch("builtins.input", return_value="feedback") + @patch("builtins.print") + def test_method_outputs_contains_real_output(self, mock_print, mock_input): + """The _method_outputs list should contain the real method output, not the collapsed outcome.""" + + class OutputTrackingFlow(Flow): + @start() + @human_feedback( + message="Review:", + emit=["approved", "rejected"], + llm="gpt-4o-mini", + ) + def generate(self): + return {"data": "real output"} + + flow = OutputTrackingFlow() + + with ( + patch.object(flow, "_request_human_feedback", return_value="approved"), + patch.object(flow, "_collapse_to_outcome", return_value="approved"), + ): + flow.kickoff() + + # _method_outputs should contain the real output + assert len(flow._method_outputs) == 1 + assert flow._method_outputs[0] == {"data": "real output"} + + @patch("builtins.input", return_value="looks good") + @patch("builtins.print") + def test_none_return_value_is_preserved(self, mock_print, mock_input): + """A method returning None should preserve None as flow output, not the outcome string.""" + + class NoneReturnFlow(Flow): + @start() + @human_feedback( + message="Review:", + emit=["approved", "rejected"], + llm="gpt-4o-mini", + ) + def process(self): + # Method does work but returns None (implicit) + pass + + flow = NoneReturnFlow() + + with ( + patch.object(flow, "_request_human_feedback", return_value=""), + patch.object(flow, "_collapse_to_outcome", return_value="approved"), + ): + result = flow.kickoff() + + # Final output should be None (the method's real return), not "approved" + assert result is None, f"Expected None, got {result!r}" + assert flow.last_human_feedback.outcome == "approved" diff --git a/lib/crewai/tests/test_human_feedback_integration.py b/lib/crewai/tests/test_human_feedback_integration.py index 407c44bd2..b2e66797b 100644 --- a/lib/crewai/tests/test_human_feedback_integration.py +++ b/lib/crewai/tests/test_human_feedback_integration.py @@ -708,7 +708,7 @@ class TestEdgeCases: @patch("builtins.input", return_value="") @patch("builtins.print") def test_empty_feedback_first_outcome_fallback(self, mock_print, mock_input): - """Test that empty feedback without default uses first outcome.""" + """Test that empty feedback without default uses first outcome for routing, but returns method output.""" class FallbackFlow(Flow): @start() @@ -726,12 +726,15 @@ class TestEdgeCases: with patch.object(flow, "_request_human_feedback", return_value=""): result = flow.kickoff() - assert result == "first" # Falls back to first outcome + # Flow result is the method's return value, NOT the collapsed outcome + assert result == "content" + # But outcome is still set to first for routing purposes + assert flow.last_human_feedback.outcome == "first" @patch("builtins.input", return_value="whitespace only ") @patch("builtins.print") def test_whitespace_only_feedback_treated_as_empty(self, mock_print, mock_input): - """Test that whitespace-only feedback is treated as empty.""" + """Test that whitespace-only feedback is treated as empty for routing, but returns method output.""" class WhitespaceFlow(Flow): @start() @@ -749,7 +752,10 @@ class TestEdgeCases: with patch.object(flow, "_request_human_feedback", return_value=" "): result = flow.kickoff() - assert result == "reject" # Uses default because feedback is empty after strip + # Flow result is the method's return value, NOT the collapsed outcome + assert result == "content" + # But outcome is set to default because feedback is empty after strip + assert flow.last_human_feedback.outcome == "reject" @patch("builtins.input", return_value="feedback") @patch("builtins.print")