Lorenze/metrics for human feedback flows (#4188)
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled

* measuring human feedback feat

* add some tests
This commit is contained in:
Lorenze Jay
2026-01-06 16:12:34 -08:00
committed by GitHub
parent b787d7e591
commit 8945457883
4 changed files with 202 additions and 3 deletions

View File

@@ -25,6 +25,8 @@ from crewai.events.types.flow_events import (
FlowCreatedEvent,
FlowFinishedEvent,
FlowStartedEvent,
HumanFeedbackReceivedEvent,
HumanFeedbackRequestedEvent,
MethodExecutionFailedEvent,
MethodExecutionFinishedEvent,
MethodExecutionStartedEvent,
@@ -45,6 +47,7 @@ from crewai.events.types.tool_usage_events import (
ToolUsageFinishedEvent,
)
from crewai.flow.flow import Flow, listen, start
from crewai.flow.human_feedback import human_feedback
from crewai.llm import LLM
from crewai.task import Task
from crewai.tools.base_tool import BaseTool
@@ -1273,3 +1276,135 @@ def test_llm_emits_event_with_lite_agent():
assert set(all_agent_roles) == {agent.role}
assert set(all_agent_id) == {str(agent.id)}
# ----------- HUMAN FEEDBACK EVENTS -----------
@patch("builtins.input", return_value="looks good")
@patch("builtins.print")
def test_human_feedback_emits_requested_and_received_events(mock_print, mock_input):
"""Test that @human_feedback decorator emits HumanFeedbackRequested and Received events."""
requested_events = []
received_events = []
events_received = threading.Event()
@crewai_event_bus.on(HumanFeedbackRequestedEvent)
def handle_requested(source, event):
requested_events.append(event)
@crewai_event_bus.on(HumanFeedbackReceivedEvent)
def handle_received(source, event):
received_events.append(event)
events_received.set()
class TestFlow(Flow):
@start()
@human_feedback(
message="Review:",
emit=["approved", "rejected"],
llm="gpt-4o-mini",
)
def review(self):
return "test content"
flow = TestFlow()
with patch.object(flow, "_collapse_to_outcome", return_value="approved"):
flow.kickoff()
assert events_received.wait(timeout=5), (
"Timeout waiting for human feedback events"
)
assert len(requested_events) == 1
assert requested_events[0].type == "human_feedback_requested"
assert requested_events[0].emit == ["approved", "rejected"]
assert requested_events[0].message == "Review:"
assert requested_events[0].output == "test content"
assert len(received_events) == 1
assert received_events[0].type == "human_feedback_received"
assert received_events[0].feedback == "looks good"
assert received_events[0].outcome is None
assert flow.last_human_feedback is not None
assert flow.last_human_feedback.outcome == "approved"
@patch("builtins.input", return_value="feedback text")
@patch("builtins.print")
def test_human_feedback_without_routing_emits_events(mock_print, mock_input):
"""Test that @human_feedback without emit still emits events."""
requested_events = []
received_events = []
events_received = threading.Event()
@crewai_event_bus.on(HumanFeedbackRequestedEvent)
def handle_requested(source, event):
requested_events.append(event)
@crewai_event_bus.on(HumanFeedbackReceivedEvent)
def handle_received(source, event):
received_events.append(event)
events_received.set()
class SimpleFlow(Flow):
@start()
@human_feedback(message="Please review:")
def review(self):
return "content to review"
flow = SimpleFlow()
flow.kickoff()
assert events_received.wait(timeout=5), (
"Timeout waiting for human feedback events"
)
assert len(requested_events) == 1
assert requested_events[0].emit is None
assert len(received_events) == 1
assert received_events[0].feedback == "feedback text"
assert received_events[0].outcome is None
@patch("builtins.input", return_value="")
@patch("builtins.print")
def test_human_feedback_empty_feedback_emits_events(mock_print, mock_input):
"""Test that empty feedback (skipped) still emits events correctly."""
received_events = []
events_received = threading.Event()
@crewai_event_bus.on(HumanFeedbackReceivedEvent)
def handle_received(source, event):
received_events.append(event)
events_received.set()
class SkipFlow(Flow):
@start()
@human_feedback(
message="Review:",
emit=["approved", "rejected"],
llm="gpt-4o-mini",
default_outcome="rejected",
)
def review(self):
return "content"
flow = SkipFlow()
flow.kickoff()
assert events_received.wait(timeout=5), (
"Timeout waiting for human feedback events"
)
assert len(received_events) == 1
assert received_events[0].feedback == ""
assert received_events[0].outcome is None
assert flow.last_human_feedback is not None
assert flow.last_human_feedback.outcome == "rejected"