Brandon/cre 509 hitl multiple rounds of followup (#1702)

* v1 of HITL working * Drop print statements * HITL code more robust. Still needs to be refactored. * refactor and more clear messages * Fix type issue * fix tests * Fix test again * Drop extra print
2026-05-03 08:12:39 +00:00 · 2024-12-05 10:14:04 -05:00
parent 06d02c0f62
commit 03abf53ba9
6 changed files with 403 additions and 89 deletions
--- a/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
+++ b/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -3,16 +3,15 @@ from typing import TYPE_CHECKING, Optional

 from crewai.memory.entity.entity_memory_item import EntityMemoryItem
 from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
+from crewai.utilities import I18N
 from crewai.utilities.converter import ConverterError
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
-from crewai.utilities import I18N
 from crewai.utilities.printer import Printer

-
 if TYPE_CHECKING:
+    from crewai.agents.agent_builder.base_agent import BaseAgent
    from crewai.crew import Crew
    from crewai.task import Task
-    from crewai.agents.agent_builder.base_agent import BaseAgent


 class CrewAgentExecutorMixin:
@@ -100,14 +99,19 @@ class CrewAgentExecutorMixin:
                print(f"Failed to add to long term memory: {e}")
                pass

-    def _ask_human_input(self, final_answer: dict) -> str:
+    def _ask_human_input(self, final_answer: str) -> str:
        """Prompt human input for final decision making."""
        self._printer.print(
            content=f"\033[1m\033[95m ## Final Result:\033[00m \033[92m{final_answer}\033[00m"
        )

        self._printer.print(
-            content="\n\n=====\n## Please provide feedback on the Final Result and the Agent's actions:",
+            content=(
+                "\n\n=====\n"
+                "## Please provide feedback on the Final Result and the Agent's actions. "
+                "Respond with 'looks good' or a similar phrase when you're satisfied.\n"
+                "=====\n"
+            ),
            color="bold_yellow",
        )
        return input()
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -16,7 +16,7 @@ from crewai.agents.tools_handler import ToolsHandler
 from crewai.tools.base_tool import BaseTool
 from crewai.tools.tool_usage import ToolUsage, ToolUsageErrorException
 from crewai.utilities import I18N, Printer
-from crewai.utilities.constants import TRAINING_DATA_FILE
+from crewai.utilities.constants import MAX_LLM_RETRY, TRAINING_DATA_FILE
 from crewai.utilities.exceptions.context_window_exceeding_exception import (
    LLMContextLengthExceededException,
 )
@@ -90,7 +90,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        if "system" in self.prompt:
            system_prompt = self._format_prompt(self.prompt.get("system", ""), inputs)
            user_prompt = self._format_prompt(self.prompt.get("user", ""), inputs)
-
            self.messages.append(self._format_msg(system_prompt, role="system"))
            self.messages.append(self._format_msg(user_prompt))
        else:
@@ -103,17 +102,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        formatted_answer = self._invoke_loop()

        if self.ask_for_human_input:
-            human_feedback = self._ask_human_input(formatted_answer.output)
-            if self.crew and self.crew._train:
-                self._handle_crew_training_output(formatted_answer, human_feedback)
+            formatted_answer = self._handle_human_feedback(formatted_answer)

-            # Making sure we only ask for it once, so disabling for the next thought loop
-            self.ask_for_human_input = False
-            self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
-            formatted_answer = self._invoke_loop()
-
-            if self.crew and self.crew._train:
-                self._handle_crew_training_output(formatted_answer)
        self._create_short_term_memory(formatted_answer)
        self._create_long_term_memory(formatted_answer)
        return {"output": formatted_answer.output}
@@ -326,16 +316,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

    def _handle_context_length(self) -> None:
        if self.respect_context_window:
-            self._logger.log(
-                "debug",
-                "Context length exceeded. Summarizing content to fit the model context window.",
+            self._printer.print(
+                content="Context length exceeded. Summarizing content to fit the model context window.",
                color="yellow",
            )
            self._summarize_messages()
        else:
-            self._logger.log(
-                "debug",
-                "Context length exceeded. Consider using smaller text or RAG tools from crewai_tools.",
+            self._printer.print(
+                content="Context length exceeded. Consider using smaller text or RAG tools from crewai_tools.",
                color="red",
            )
            raise SystemExit(
@@ -362,15 +350,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    ] = result.output
                    training_handler.save(training_data)
                else:
-                    self._logger.log(
-                        "error",
-                        "Invalid train iteration type or agent_id not in training data.",
+                    self._printer.print(
+                        content="Invalid train iteration type or agent_id not in training data.",
                        color="red",
                    )
            else:
-                self._logger.log(
-                    "error",
-                    "Crew is None or does not have _train_iteration attribute.",
+                self._printer.print(
+                    content="Crew is None or does not have _train_iteration attribute.",
                    color="red",
                )

@@ -388,15 +374,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        train_iteration, agent_id, training_data
                    )
                else:
-                    self._logger.log(
-                        "error",
-                        "Invalid train iteration type. Expected int.",
+                    self._printer.print(
+                        content="Invalid train iteration type. Expected int.",
                        color="red",
                    )
            else:
-                self._logger.log(
-                    "error",
-                    "Crew is None or does not have _train_iteration attribute.",
+                self._printer.print(
+                    content="Crew is None or does not have _train_iteration attribute.",
                    color="red",
                )

@@ -412,3 +396,82 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
    def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]:
        prompt = prompt.rstrip()
        return {"role": role, "content": prompt}
+
+    def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
+        """
+        Handles the human feedback loop, allowing the user to provide feedback
+        on the agent's output and determining if additional iterations are needed.
+
+        Parameters:
+            formatted_answer (AgentFinish): The initial output from the agent.
+
+        Returns:
+            AgentFinish: The final output after incorporating human feedback.
+        """
+        while self.ask_for_human_input:
+            human_feedback = self._ask_human_input(formatted_answer.output)
+            print("Human feedback: ", human_feedback)
+
+            if self.crew and self.crew._train:
+                self._handle_crew_training_output(formatted_answer, human_feedback)
+
+            # Make an LLM call to verify if additional changes are requested based on human feedback
+            additional_changes_prompt = self._i18n.slice(
+                "human_feedback_classification"
+            ).format(feedback=human_feedback)
+
+            retry_count = 0
+            llm_call_successful = False
+            additional_changes_response = None
+
+            while retry_count < MAX_LLM_RETRY and not llm_call_successful:
+                try:
+                    additional_changes_response = (
+                        self.llm.call(
+                            [
+                                self._format_msg(
+                                    additional_changes_prompt, role="system"
+                                )
+                            ],
+                            callbacks=self.callbacks,
+                        )
+                        .strip()
+                        .lower()
+                    )
+                    llm_call_successful = True
+                except Exception as e:
+                    retry_count += 1
+
+                    self._printer.print(
+                        content=f"Error during LLM call to classify human feedback: {e}. Retrying... ({retry_count}/{MAX_LLM_RETRY})",
+                        color="red",
+                    )
+
+            if not llm_call_successful:
+                self._printer.print(
+                    content="Error processing feedback after multiple attempts.",
+                    color="red",
+                )
+                self.ask_for_human_input = False
+                break
+
+            if additional_changes_response == "false":
+                self.ask_for_human_input = False
+            elif additional_changes_response == "true":
+                self.ask_for_human_input = True
+                # Add human feedback to messages
+                self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
+                # Invoke the loop again with updated messages
+                formatted_answer = self._invoke_loop()
+
+                if self.crew and self.crew._train:
+                    self._handle_crew_training_output(formatted_answer)
+            else:
+                # Unexpected response
+                self._printer.print(
+                    content=f"Unexpected response from LLM: '{additional_changes_response}'. Assuming no additional changes requested.",
+                    color="red",
+                )
+                self.ask_for_human_input = False
+
+        return formatted_answer