Brandon/cre 509 hitl multiple rounds of followup (#1702)

* v1 of HITL working

* Drop print statements

* HITL code more robust. Still needs to be refactored.

* refactor and more clear messages

* Fix type issue

* fix tests

* Fix test again

* Drop extra print
This commit is contained in:
Brandon Hancock (bhancock_ai)
2024-12-05 10:14:04 -05:00
committed by GitHub
parent 06d02c0f62
commit 03abf53ba9
6 changed files with 403 additions and 89 deletions

View File

@@ -3,16 +3,15 @@ from typing import TYPE_CHECKING, Optional
from crewai.memory.entity.entity_memory_item import EntityMemoryItem
from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
from crewai.utilities import I18N
from crewai.utilities.converter import ConverterError
from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
from crewai.utilities import I18N
from crewai.utilities.printer import Printer
if TYPE_CHECKING:
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.crew import Crew
from crewai.task import Task
from crewai.agents.agent_builder.base_agent import BaseAgent
class CrewAgentExecutorMixin:
@@ -100,14 +99,19 @@ class CrewAgentExecutorMixin:
print(f"Failed to add to long term memory: {e}")
pass
def _ask_human_input(self, final_answer: dict) -> str:
def _ask_human_input(self, final_answer: str) -> str:
"""Prompt human input for final decision making."""
self._printer.print(
content=f"\033[1m\033[95m ## Final Result:\033[00m \033[92m{final_answer}\033[00m"
)
self._printer.print(
content="\n\n=====\n## Please provide feedback on the Final Result and the Agent's actions:",
content=(
"\n\n=====\n"
"## Please provide feedback on the Final Result and the Agent's actions. "
"Respond with 'looks good' or a similar phrase when you're satisfied.\n"
"=====\n"
),
color="bold_yellow",
)
return input()

View File

@@ -16,7 +16,7 @@ from crewai.agents.tools_handler import ToolsHandler
from crewai.tools.base_tool import BaseTool
from crewai.tools.tool_usage import ToolUsage, ToolUsageErrorException
from crewai.utilities import I18N, Printer
from crewai.utilities.constants import TRAINING_DATA_FILE
from crewai.utilities.constants import MAX_LLM_RETRY, TRAINING_DATA_FILE
from crewai.utilities.exceptions.context_window_exceeding_exception import (
LLMContextLengthExceededException,
)
@@ -90,7 +90,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if "system" in self.prompt:
system_prompt = self._format_prompt(self.prompt.get("system", ""), inputs)
user_prompt = self._format_prompt(self.prompt.get("user", ""), inputs)
self.messages.append(self._format_msg(system_prompt, role="system"))
self.messages.append(self._format_msg(user_prompt))
else:
@@ -103,17 +102,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
formatted_answer = self._invoke_loop()
if self.ask_for_human_input:
human_feedback = self._ask_human_input(formatted_answer.output)
if self.crew and self.crew._train:
self._handle_crew_training_output(formatted_answer, human_feedback)
formatted_answer = self._handle_human_feedback(formatted_answer)
# Making sure we only ask for it once, so disabling for the next thought loop
self.ask_for_human_input = False
self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
formatted_answer = self._invoke_loop()
if self.crew and self.crew._train:
self._handle_crew_training_output(formatted_answer)
self._create_short_term_memory(formatted_answer)
self._create_long_term_memory(formatted_answer)
return {"output": formatted_answer.output}
@@ -326,16 +316,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
def _handle_context_length(self) -> None:
if self.respect_context_window:
self._logger.log(
"debug",
"Context length exceeded. Summarizing content to fit the model context window.",
self._printer.print(
content="Context length exceeded. Summarizing content to fit the model context window.",
color="yellow",
)
self._summarize_messages()
else:
self._logger.log(
"debug",
"Context length exceeded. Consider using smaller text or RAG tools from crewai_tools.",
self._printer.print(
content="Context length exceeded. Consider using smaller text or RAG tools from crewai_tools.",
color="red",
)
raise SystemExit(
@@ -362,15 +350,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
] = result.output
training_handler.save(training_data)
else:
self._logger.log(
"error",
"Invalid train iteration type or agent_id not in training data.",
self._printer.print(
content="Invalid train iteration type or agent_id not in training data.",
color="red",
)
else:
self._logger.log(
"error",
"Crew is None or does not have _train_iteration attribute.",
self._printer.print(
content="Crew is None or does not have _train_iteration attribute.",
color="red",
)
@@ -388,15 +374,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
train_iteration, agent_id, training_data
)
else:
self._logger.log(
"error",
"Invalid train iteration type. Expected int.",
self._printer.print(
content="Invalid train iteration type. Expected int.",
color="red",
)
else:
self._logger.log(
"error",
"Crew is None or does not have _train_iteration attribute.",
self._printer.print(
content="Crew is None or does not have _train_iteration attribute.",
color="red",
)
@@ -412,3 +396,82 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]:
prompt = prompt.rstrip()
return {"role": role, "content": prompt}
def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
"""
Handles the human feedback loop, allowing the user to provide feedback
on the agent's output and determining if additional iterations are needed.
Parameters:
formatted_answer (AgentFinish): The initial output from the agent.
Returns:
AgentFinish: The final output after incorporating human feedback.
"""
while self.ask_for_human_input:
human_feedback = self._ask_human_input(formatted_answer.output)
print("Human feedback: ", human_feedback)
if self.crew and self.crew._train:
self._handle_crew_training_output(formatted_answer, human_feedback)
# Make an LLM call to verify if additional changes are requested based on human feedback
additional_changes_prompt = self._i18n.slice(
"human_feedback_classification"
).format(feedback=human_feedback)
retry_count = 0
llm_call_successful = False
additional_changes_response = None
while retry_count < MAX_LLM_RETRY and not llm_call_successful:
try:
additional_changes_response = (
self.llm.call(
[
self._format_msg(
additional_changes_prompt, role="system"
)
],
callbacks=self.callbacks,
)
.strip()
.lower()
)
llm_call_successful = True
except Exception as e:
retry_count += 1
self._printer.print(
content=f"Error during LLM call to classify human feedback: {e}. Retrying... ({retry_count}/{MAX_LLM_RETRY})",
color="red",
)
if not llm_call_successful:
self._printer.print(
content="Error processing feedback after multiple attempts.",
color="red",
)
self.ask_for_human_input = False
break
if additional_changes_response == "false":
self.ask_for_human_input = False
elif additional_changes_response == "true":
self.ask_for_human_input = True
# Add human feedback to messages
self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
# Invoke the loop again with updated messages
formatted_answer = self._invoke_loop()
if self.crew and self.crew._train:
self._handle_crew_training_output(formatted_answer)
else:
# Unexpected response
self._printer.print(
content=f"Unexpected response from LLM: '{additional_changes_response}'. Assuming no additional changes requested.",
color="red",
)
self.ask_for_human_input = False
return formatted_answer