Brandon/cre 509 hitl multiple rounds of followup (#1702)

* v1 of HITL working * Drop print statements * HITL code more robust. Still needs to be refactored. * refactor and more clear messages * Fix type issue * fix tests * Fix test again * Drop extra print
2026-01-10 00:28:31 +00:00 · 2024-12-05 10:14:04 -05:00
parent 06d02c0f62
commit 03abf53ba9
6 changed files with 403 additions and 89 deletions
--- a/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
+++ b/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -3,16 +3,15 @@ from typing import TYPE_CHECKING, Optional

 from crewai.memory.entity.entity_memory_item import EntityMemoryItem
 from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
+from crewai.utilities import I18N
 from crewai.utilities.converter import ConverterError
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
-from crewai.utilities import I18N
 from crewai.utilities.printer import Printer

-
 if TYPE_CHECKING:
+    from crewai.agents.agent_builder.base_agent import BaseAgent
    from crewai.crew import Crew
    from crewai.task import Task
-    from crewai.agents.agent_builder.base_agent import BaseAgent


 class CrewAgentExecutorMixin:
@@ -100,14 +99,19 @@ class CrewAgentExecutorMixin:
                print(f"Failed to add to long term memory: {e}")
                pass

-    def _ask_human_input(self, final_answer: dict) -> str:
+    def _ask_human_input(self, final_answer: str) -> str:
        """Prompt human input for final decision making."""
        self._printer.print(
            content=f"\033[1m\033[95m ## Final Result:\033[00m \033[92m{final_answer}\033[00m"
        )

        self._printer.print(
-            content="\n\n=====\n## Please provide feedback on the Final Result and the Agent's actions:",
+            content=(
+                "\n\n=====\n"
+                "## Please provide feedback on the Final Result and the Agent's actions. "
+                "Respond with 'looks good' or a similar phrase when you're satisfied.\n"
+                "=====\n"
+            ),
            color="bold_yellow",
        )
        return input()
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -16,7 +16,7 @@ from crewai.agents.tools_handler import ToolsHandler
 from crewai.tools.base_tool import BaseTool
 from crewai.tools.tool_usage import ToolUsage, ToolUsageErrorException
 from crewai.utilities import I18N, Printer
-from crewai.utilities.constants import TRAINING_DATA_FILE
+from crewai.utilities.constants import MAX_LLM_RETRY, TRAINING_DATA_FILE
 from crewai.utilities.exceptions.context_window_exceeding_exception import (
    LLMContextLengthExceededException,
 )
@@ -90,7 +90,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        if "system" in self.prompt:
            system_prompt = self._format_prompt(self.prompt.get("system", ""), inputs)
            user_prompt = self._format_prompt(self.prompt.get("user", ""), inputs)
-
            self.messages.append(self._format_msg(system_prompt, role="system"))
            self.messages.append(self._format_msg(user_prompt))
        else:
@@ -103,17 +102,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        formatted_answer = self._invoke_loop()

        if self.ask_for_human_input:
-            human_feedback = self._ask_human_input(formatted_answer.output)
-            if self.crew and self.crew._train:
-                self._handle_crew_training_output(formatted_answer, human_feedback)
+            formatted_answer = self._handle_human_feedback(formatted_answer)

-            # Making sure we only ask for it once, so disabling for the next thought loop
-            self.ask_for_human_input = False
-            self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
-            formatted_answer = self._invoke_loop()
-
-            if self.crew and self.crew._train:
-                self._handle_crew_training_output(formatted_answer)
        self._create_short_term_memory(formatted_answer)
        self._create_long_term_memory(formatted_answer)
        return {"output": formatted_answer.output}
@@ -326,16 +316,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

    def _handle_context_length(self) -> None:
        if self.respect_context_window:
-            self._logger.log(
-                "debug",
-                "Context length exceeded. Summarizing content to fit the model context window.",
+            self._printer.print(
+                content="Context length exceeded. Summarizing content to fit the model context window.",
                color="yellow",
            )
            self._summarize_messages()
        else:
-            self._logger.log(
-                "debug",
-                "Context length exceeded. Consider using smaller text or RAG tools from crewai_tools.",
+            self._printer.print(
+                content="Context length exceeded. Consider using smaller text or RAG tools from crewai_tools.",
                color="red",
            )
            raise SystemExit(
@@ -362,15 +350,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                    ] = result.output
                    training_handler.save(training_data)
                else:
-                    self._logger.log(
-                        "error",
-                        "Invalid train iteration type or agent_id not in training data.",
+                    self._printer.print(
+                        content="Invalid train iteration type or agent_id not in training data.",
                        color="red",
                    )
            else:
-                self._logger.log(
-                    "error",
-                    "Crew is None or does not have _train_iteration attribute.",
+                self._printer.print(
+                    content="Crew is None or does not have _train_iteration attribute.",
                    color="red",
                )

@@ -388,15 +374,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        train_iteration, agent_id, training_data
                    )
                else:
-                    self._logger.log(
-                        "error",
-                        "Invalid train iteration type. Expected int.",
+                    self._printer.print(
+                        content="Invalid train iteration type. Expected int.",
                        color="red",
                    )
            else:
-                self._logger.log(
-                    "error",
-                    "Crew is None or does not have _train_iteration attribute.",
+                self._printer.print(
+                    content="Crew is None or does not have _train_iteration attribute.",
                    color="red",
                )

@@ -412,3 +396,82 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
    def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]:
        prompt = prompt.rstrip()
        return {"role": role, "content": prompt}
+
+    def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
+        """
+        Handles the human feedback loop, allowing the user to provide feedback
+        on the agent's output and determining if additional iterations are needed.
+
+        Parameters:
+            formatted_answer (AgentFinish): The initial output from the agent.
+
+        Returns:
+            AgentFinish: The final output after incorporating human feedback.
+        """
+        while self.ask_for_human_input:
+            human_feedback = self._ask_human_input(formatted_answer.output)
+            print("Human feedback: ", human_feedback)
+
+            if self.crew and self.crew._train:
+                self._handle_crew_training_output(formatted_answer, human_feedback)
+
+            # Make an LLM call to verify if additional changes are requested based on human feedback
+            additional_changes_prompt = self._i18n.slice(
+                "human_feedback_classification"
+            ).format(feedback=human_feedback)
+
+            retry_count = 0
+            llm_call_successful = False
+            additional_changes_response = None
+
+            while retry_count < MAX_LLM_RETRY and not llm_call_successful:
+                try:
+                    additional_changes_response = (
+                        self.llm.call(
+                            [
+                                self._format_msg(
+                                    additional_changes_prompt, role="system"
+                                )
+                            ],
+                            callbacks=self.callbacks,
+                        )
+                        .strip()
+                        .lower()
+                    )
+                    llm_call_successful = True
+                except Exception as e:
+                    retry_count += 1
+
+                    self._printer.print(
+                        content=f"Error during LLM call to classify human feedback: {e}. Retrying... ({retry_count}/{MAX_LLM_RETRY})",
+                        color="red",
+                    )
+
+            if not llm_call_successful:
+                self._printer.print(
+                    content="Error processing feedback after multiple attempts.",
+                    color="red",
+                )
+                self.ask_for_human_input = False
+                break
+
+            if additional_changes_response == "false":
+                self.ask_for_human_input = False
+            elif additional_changes_response == "true":
+                self.ask_for_human_input = True
+                # Add human feedback to messages
+                self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
+                # Invoke the loop again with updated messages
+                formatted_answer = self._invoke_loop()
+
+                if self.crew and self.crew._train:
+                    self._handle_crew_training_output(formatted_answer)
+            else:
+                # Unexpected response
+                self._printer.print(
+                    content=f"Unexpected response from LLM: '{additional_changes_response}'. Assuming no additional changes requested.",
+                    color="red",
+                )
+                self.ask_for_human_input = False
+
+        return formatted_answer
--- a/src/crewai/translations/en.json
+++ b/src/crewai/translations/en.json
@@ -22,7 +22,8 @@
    "sumamrize_instruction": "Summarize the following text, make sure to include all the important information: {group}",
    "summary": "This is a summary of our conversation so far:\n{merged_summary}",
    "manager_request": "Your best answer to your coworker asking you this, accounting for the context shared.",
-    "formatted_task_instructions": "Ensure your final answer contains only the content in the following format: {output_format}\n\nEnsure the final output does not include any code block markers like ```json or ```python."
+    "formatted_task_instructions": "Ensure your final answer contains only the content in the following format: {output_format}\n\nEnsure the final output does not include any code block markers like ```json or ```python.",
+    "human_feedback_classification": "Determine if the following feedback indicates that the user is satisfied or if further changes are needed. Respond with 'True' if further changes are needed, or 'False' if the user is satisfied. **Important** Do not include any additional commentary outside of your 'True' or 'False' response.\n\nFeedback: \"{feedback}\""
  },
  "errors": {
    "force_final_answer_error": "You can't keep going, this was the best you could do.\n {formatted_answer.text}",
--- a/src/crewai/utilities/constants.py
+++ b/src/crewai/utilities/constants.py
@@ -2,3 +2,4 @@ TRAINING_DATA_FILE = "training_data.pkl"
 TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
 DEFAULT_SCORE_THRESHOLD = 0.35
 KNOWLEDGE_DIRECTORY = "knowledge"
+MAX_LLM_RETRY = 3