mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-25 00:38:30 +00:00
Compare commits
40 Commits
bugfix/llm
...
fix/clone_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2816e97753 | ||
|
|
c4da244b9a | ||
|
|
6617db78f8 | ||
|
|
fd89c3b896 | ||
|
|
b183aaf51d | ||
|
|
8570461969 | ||
|
|
b92253bb13 | ||
|
|
42769e8b22 | ||
|
|
ac28f7f4bc | ||
|
|
9b88bcd97e | ||
|
|
1de204eff8 | ||
|
|
f4b7cffb6b | ||
|
|
1cc9c981e4 | ||
|
|
adec0892fa | ||
|
|
cb3865a042 | ||
|
|
d506bdb749 | ||
|
|
319128c90d | ||
|
|
6fb654cccd | ||
|
|
0675a2fe04 | ||
|
|
d438f5a7d4 | ||
|
|
4ff9d4963c | ||
|
|
079692de35 | ||
|
|
65b6ff1cc7 | ||
|
|
4008ba74f8 | ||
|
|
24dbdd5686 | ||
|
|
e4b97e328e | ||
|
|
27e49300f6 | ||
|
|
b87c908434 | ||
|
|
c6d8c75869 | ||
|
|
849908c7ea | ||
|
|
ab8d56de4f | ||
|
|
79aaab99c4 | ||
|
|
65d3837c0d | ||
|
|
e3e62c16d5 | ||
|
|
f34f53fae2 | ||
|
|
71246e9de1 | ||
|
|
591c4a511b | ||
|
|
c67f75d848 | ||
|
|
dc9d1d6b49 | ||
|
|
f3004ffb2b |
@@ -33,12 +33,11 @@ crew = Crew(
|
||||
| :------------------------------- | :---------------- | :---------------------------- | :------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Description** | `description` | `str` | A clear, concise statement of what the task entails. |
|
||||
| **Expected Output** | `expected_output` | `str` | A detailed description of what the task's completion looks like. |
|
||||
| **Name** _(optional)_ | `name` | `Optional[str]` | A name identifier for the task. |
|
||||
| **Agent** _(optional)_ | `agent` | `Optional[BaseAgent]` | The agent responsible for executing the task. |
|
||||
| **Tools** _(optional)_ | `tools` | `List[BaseTool]` | The tools/resources the agent is limited to use for this task. |
|
||||
| **Name** _(optional)_ | `name` | `Optional[str]` | A name identifier for the task. |
|
||||
| **Agent** _(optional)_ | `agent` | `Optional[BaseAgent]` | The agent responsible for executing the task. |
|
||||
| **Tools** _(optional)_ | `tools` | `List[BaseTool]` | The tools/resources the agent is limited to use for this task. |
|
||||
| **Context** _(optional)_ | `context` | `Optional[List["Task"]]` | Other tasks whose outputs will be used as context for this task. |
|
||||
| **Async Execution** _(optional)_ | `async_execution` | `Optional[bool]` | Whether the task should be executed asynchronously. Defaults to False. |
|
||||
| **Human Input** _(optional)_ | `human_input` | `Optional[bool]` | Whether the task should have a human review the final answer of the agent. Defaults to False. |
|
||||
| **Config** _(optional)_ | `config` | `Optional[Dict[str, Any]]` | Task-specific configuration parameters. |
|
||||
| **Output File** _(optional)_ | `output_file` | `Optional[str]` | File path for storing the task output. |
|
||||
| **Output JSON** _(optional)_ | `output_json` | `Optional[Type[BaseModel]]` | A Pydantic model to structure the JSON output. |
|
||||
|
||||
@@ -95,29 +95,18 @@ class CrewAgentExecutorMixin:
|
||||
pass
|
||||
|
||||
def _ask_human_input(self, final_answer: str) -> str:
|
||||
"""Prompt human input with mode-appropriate messaging."""
|
||||
"""Prompt human input for final decision making."""
|
||||
self._printer.print(
|
||||
content=f"\033[1m\033[95m ## Final Result:\033[00m \033[92m{final_answer}\033[00m"
|
||||
)
|
||||
|
||||
# Training mode prompt (single iteration)
|
||||
if self.crew and getattr(self.crew, "_train", False):
|
||||
prompt = (
|
||||
self._printer.print(
|
||||
content=(
|
||||
"\n\n=====\n"
|
||||
"## TRAINING MODE: Provide feedback to improve the agent's performance.\n"
|
||||
"This will be used to train better versions of the agent.\n"
|
||||
"Please provide detailed feedback about the result quality and reasoning process.\n"
|
||||
"## Please provide feedback on the Final Result and the Agent's actions. "
|
||||
"Respond with 'looks good' or a similar phrase when you're satisfied.\n"
|
||||
"=====\n"
|
||||
)
|
||||
# Regular human-in-the-loop prompt (multiple iterations)
|
||||
else:
|
||||
prompt = (
|
||||
"\n\n=====\n"
|
||||
"## HUMAN FEEDBACK: Provide feedback on the Final Result and Agent's actions.\n"
|
||||
"Respond with 'looks good' to accept or provide specific improvement requests.\n"
|
||||
"You can provide multiple rounds of feedback until satisfied.\n"
|
||||
"=====\n"
|
||||
)
|
||||
|
||||
self._printer.print(content=prompt, color="bold_yellow")
|
||||
),
|
||||
color="bold_yellow",
|
||||
)
|
||||
return input()
|
||||
|
||||
@@ -100,12 +100,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
|
||||
try:
|
||||
formatted_answer = self._invoke_loop()
|
||||
except AssertionError:
|
||||
self._printer.print(
|
||||
content="Agent failed to reach a final answer. This is likely a bug - please report it.",
|
||||
color="red",
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
if e.__class__.__module__.startswith("litellm"):
|
||||
# Do not retry on litellm errors
|
||||
@@ -121,7 +115,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self._create_long_term_memory(formatted_answer)
|
||||
return {"output": formatted_answer.output}
|
||||
|
||||
def _invoke_loop(self) -> AgentFinish:
|
||||
def _invoke_loop(self):
|
||||
"""
|
||||
Main loop to invoke the agent's thought process until it reaches a conclusion
|
||||
or the maximum number of iterations is reached.
|
||||
@@ -167,11 +161,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
finally:
|
||||
self.iterations += 1
|
||||
|
||||
# During the invoke loop, formatted_answer alternates between AgentAction
|
||||
# (when the agent is using tools) and eventually becomes AgentFinish
|
||||
# (when the agent reaches a final answer). This assertion confirms we've
|
||||
# reached a final answer and helps type checking understand this transition.
|
||||
assert isinstance(formatted_answer, AgentFinish)
|
||||
self._show_logs(formatted_answer)
|
||||
return formatted_answer
|
||||
|
||||
@@ -303,11 +292,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self._printer.print(
|
||||
content=f"\033[1m\033[95m# Agent:\033[00m \033[1m\033[92m{agent_role}\033[00m"
|
||||
)
|
||||
description = (
|
||||
getattr(self.task, "description") if self.task else "Not Found"
|
||||
)
|
||||
self._printer.print(
|
||||
content=f"\033[95m## Task:\033[00m \033[92m{description}\033[00m"
|
||||
content=f"\033[95m## Task:\033[00m \033[92m{self.task.description}\033[00m"
|
||||
)
|
||||
|
||||
def _show_logs(self, formatted_answer: Union[AgentAction, AgentFinish]):
|
||||
@@ -432,50 +418,58 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
)
|
||||
|
||||
def _handle_crew_training_output(
|
||||
self, result: AgentFinish, human_feedback: Optional[str] = None
|
||||
self, result: AgentFinish, human_feedback: str | None = None
|
||||
) -> None:
|
||||
"""Handle the process of saving training data."""
|
||||
"""Function to handle the process of the training data."""
|
||||
agent_id = str(self.agent.id) # type: ignore
|
||||
train_iteration = (
|
||||
getattr(self.crew, "_train_iteration", None) if self.crew else None
|
||||
)
|
||||
|
||||
if train_iteration is None or not isinstance(train_iteration, int):
|
||||
self._printer.print(
|
||||
content="Invalid or missing train iteration. Cannot save training data.",
|
||||
color="red",
|
||||
)
|
||||
return
|
||||
|
||||
# Load training data
|
||||
training_handler = CrewTrainingHandler(TRAINING_DATA_FILE)
|
||||
training_data = training_handler.load() or {}
|
||||
training_data = training_handler.load()
|
||||
|
||||
# Initialize or retrieve agent's training data
|
||||
agent_training_data = training_data.get(agent_id, {})
|
||||
|
||||
if human_feedback is not None:
|
||||
# Save initial output and human feedback
|
||||
agent_training_data[train_iteration] = {
|
||||
"initial_output": result.output,
|
||||
"human_feedback": human_feedback,
|
||||
}
|
||||
else:
|
||||
# Save improved output
|
||||
if train_iteration in agent_training_data:
|
||||
agent_training_data[train_iteration]["improved_output"] = result.output
|
||||
# Check if training data exists, human input is not requested, and self.crew is valid
|
||||
if training_data and not self.ask_for_human_input:
|
||||
if self.crew is not None and hasattr(self.crew, "_train_iteration"):
|
||||
train_iteration = self.crew._train_iteration
|
||||
if agent_id in training_data and isinstance(train_iteration, int):
|
||||
training_data[agent_id][train_iteration][
|
||||
"improved_output"
|
||||
] = result.output
|
||||
training_handler.save(training_data)
|
||||
else:
|
||||
self._printer.print(
|
||||
content="Invalid train iteration type or agent_id not in training data.",
|
||||
color="red",
|
||||
)
|
||||
else:
|
||||
self._printer.print(
|
||||
content=(
|
||||
f"No existing training data for agent {agent_id} and iteration "
|
||||
f"{train_iteration}. Cannot save improved output."
|
||||
),
|
||||
content="Crew is None or does not have _train_iteration attribute.",
|
||||
color="red",
|
||||
)
|
||||
return
|
||||
|
||||
# Update the training data and save
|
||||
training_data[agent_id] = agent_training_data
|
||||
training_handler.save(training_data)
|
||||
if self.ask_for_human_input and human_feedback is not None:
|
||||
training_data = {
|
||||
"initial_output": result.output,
|
||||
"human_feedback": human_feedback,
|
||||
"agent": agent_id,
|
||||
"agent_role": self.agent.role, # type: ignore
|
||||
}
|
||||
if self.crew is not None and hasattr(self.crew, "_train_iteration"):
|
||||
train_iteration = self.crew._train_iteration
|
||||
if isinstance(train_iteration, int):
|
||||
CrewTrainingHandler(TRAINING_DATA_FILE).append(
|
||||
train_iteration, agent_id, training_data
|
||||
)
|
||||
else:
|
||||
self._printer.print(
|
||||
content="Invalid train iteration type. Expected int.",
|
||||
color="red",
|
||||
)
|
||||
else:
|
||||
self._printer.print(
|
||||
content="Crew is None or does not have _train_iteration attribute.",
|
||||
color="red",
|
||||
)
|
||||
|
||||
def _format_prompt(self, prompt: str, inputs: Dict[str, str]) -> str:
|
||||
prompt = prompt.replace("{input}", inputs["input"])
|
||||
@@ -491,103 +485,82 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
return {"role": role, "content": prompt}
|
||||
|
||||
def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
|
||||
"""Handle human feedback with different flows for training vs regular use.
|
||||
"""
|
||||
Handles the human feedback loop, allowing the user to provide feedback
|
||||
on the agent's output and determining if additional iterations are needed.
|
||||
|
||||
Args:
|
||||
formatted_answer: The initial AgentFinish result to get feedback on
|
||||
Parameters:
|
||||
formatted_answer (AgentFinish): The initial output from the agent.
|
||||
|
||||
Returns:
|
||||
AgentFinish: The final answer after processing feedback
|
||||
AgentFinish: The final output after incorporating human feedback.
|
||||
"""
|
||||
human_feedback = self._ask_human_input(formatted_answer.output)
|
||||
|
||||
if self._is_training_mode():
|
||||
return self._handle_training_feedback(formatted_answer, human_feedback)
|
||||
|
||||
return self._handle_regular_feedback(formatted_answer, human_feedback)
|
||||
|
||||
def _is_training_mode(self) -> bool:
|
||||
"""Check if crew is in training mode."""
|
||||
return bool(self.crew and self.crew._train)
|
||||
|
||||
def _handle_training_feedback(
|
||||
self, initial_answer: AgentFinish, feedback: str
|
||||
) -> AgentFinish:
|
||||
"""Process feedback for training scenarios with single iteration."""
|
||||
self._printer.print(
|
||||
content="\nProcessing training feedback.\n",
|
||||
color="yellow",
|
||||
)
|
||||
self._handle_crew_training_output(initial_answer, feedback)
|
||||
self.messages.append(self._format_msg(f"Feedback: {feedback}"))
|
||||
improved_answer = self._invoke_loop()
|
||||
self._handle_crew_training_output(improved_answer)
|
||||
self.ask_for_human_input = False
|
||||
return improved_answer
|
||||
|
||||
def _handle_regular_feedback(
|
||||
self, current_answer: AgentFinish, initial_feedback: str
|
||||
) -> AgentFinish:
|
||||
"""Process feedback for regular use with potential multiple iterations."""
|
||||
feedback = initial_feedback
|
||||
answer = current_answer
|
||||
|
||||
while self.ask_for_human_input:
|
||||
response = self._get_llm_feedback_response(feedback)
|
||||
human_feedback = self._ask_human_input(formatted_answer.output)
|
||||
|
||||
if not self._feedback_requires_changes(response):
|
||||
if self.crew and self.crew._train:
|
||||
self._handle_crew_training_output(formatted_answer, human_feedback)
|
||||
|
||||
# Make an LLM call to verify if additional changes are requested based on human feedback
|
||||
additional_changes_prompt = self._i18n.slice(
|
||||
"human_feedback_classification"
|
||||
).format(feedback=human_feedback)
|
||||
|
||||
retry_count = 0
|
||||
llm_call_successful = False
|
||||
additional_changes_response = None
|
||||
|
||||
while retry_count < MAX_LLM_RETRY and not llm_call_successful:
|
||||
try:
|
||||
additional_changes_response = (
|
||||
self.llm.call(
|
||||
[
|
||||
self._format_msg(
|
||||
additional_changes_prompt, role="system"
|
||||
)
|
||||
],
|
||||
callbacks=self.callbacks,
|
||||
)
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
llm_call_successful = True
|
||||
except Exception as e:
|
||||
retry_count += 1
|
||||
|
||||
self._printer.print(
|
||||
content=f"Error during LLM call to classify human feedback: {e}. Retrying... ({retry_count}/{MAX_LLM_RETRY})",
|
||||
color="red",
|
||||
)
|
||||
|
||||
if not llm_call_successful:
|
||||
self._printer.print(
|
||||
content="Error processing feedback after multiple attempts.",
|
||||
color="red",
|
||||
)
|
||||
self.ask_for_human_input = False
|
||||
break
|
||||
|
||||
if additional_changes_response == "false":
|
||||
self.ask_for_human_input = False
|
||||
elif additional_changes_response == "true":
|
||||
self.ask_for_human_input = True
|
||||
# Add human feedback to messages
|
||||
self.messages.append(self._format_msg(f"Feedback: {human_feedback}"))
|
||||
# Invoke the loop again with updated messages
|
||||
formatted_answer = self._invoke_loop()
|
||||
|
||||
if self.crew and self.crew._train:
|
||||
self._handle_crew_training_output(formatted_answer)
|
||||
else:
|
||||
answer = self._process_feedback_iteration(feedback)
|
||||
feedback = self._ask_human_input(answer.output)
|
||||
# Unexpected response
|
||||
self._printer.print(
|
||||
content=f"Unexpected response from LLM: '{additional_changes_response}'. Assuming no additional changes requested.",
|
||||
color="red",
|
||||
)
|
||||
self.ask_for_human_input = False
|
||||
|
||||
return answer
|
||||
|
||||
def _get_llm_feedback_response(self, feedback: str) -> Optional[str]:
|
||||
"""Get LLM classification of whether feedback requires changes."""
|
||||
prompt = self._i18n.slice("human_feedback_classification").format(
|
||||
feedback=feedback
|
||||
)
|
||||
message = self._format_msg(prompt, role="system")
|
||||
|
||||
for retry in range(MAX_LLM_RETRY):
|
||||
try:
|
||||
response = self.llm.call([message], callbacks=self.callbacks)
|
||||
return response.strip().lower() if response else None
|
||||
except Exception as error:
|
||||
self._log_feedback_error(retry, error)
|
||||
|
||||
self._log_max_retries_exceeded()
|
||||
return None
|
||||
|
||||
def _feedback_requires_changes(self, response: Optional[str]) -> bool:
|
||||
"""Determine if feedback response indicates need for changes."""
|
||||
return response == "true" if response else False
|
||||
|
||||
def _process_feedback_iteration(self, feedback: str) -> AgentFinish:
|
||||
"""Process a single feedback iteration."""
|
||||
self.messages.append(self._format_msg(f"Feedback: {feedback}"))
|
||||
return self._invoke_loop()
|
||||
|
||||
def _log_feedback_error(self, retry_count: int, error: Exception) -> None:
|
||||
"""Log feedback processing errors."""
|
||||
self._printer.print(
|
||||
content=(
|
||||
f"Error processing feedback: {error}. "
|
||||
f"Retrying... ({retry_count + 1}/{MAX_LLM_RETRY})"
|
||||
),
|
||||
color="red",
|
||||
)
|
||||
|
||||
def _log_max_retries_exceeded(self) -> None:
|
||||
"""Log when max retries for feedback processing are exceeded."""
|
||||
self._printer.print(
|
||||
content=(
|
||||
f"Failed to process feedback after {MAX_LLM_RETRY} attempts. "
|
||||
"Ending feedback loop."
|
||||
),
|
||||
color="red",
|
||||
)
|
||||
return formatted_answer
|
||||
|
||||
def _handle_max_iterations_exceeded(self, formatted_answer):
|
||||
"""
|
||||
|
||||
@@ -494,26 +494,21 @@ class Crew(BaseModel):
|
||||
train_crew = self.copy()
|
||||
train_crew._setup_for_training(filename)
|
||||
|
||||
try:
|
||||
for n_iteration in range(n_iterations):
|
||||
train_crew._train_iteration = n_iteration
|
||||
train_crew.kickoff(inputs=inputs)
|
||||
for n_iteration in range(n_iterations):
|
||||
train_crew._train_iteration = n_iteration
|
||||
train_crew.kickoff(inputs=inputs)
|
||||
|
||||
training_data = CrewTrainingHandler(TRAINING_DATA_FILE).load()
|
||||
training_data = CrewTrainingHandler(TRAINING_DATA_FILE).load()
|
||||
|
||||
for agent in train_crew.agents:
|
||||
if training_data.get(str(agent.id)):
|
||||
result = TaskEvaluator(agent).evaluate_training_data(
|
||||
training_data=training_data, agent_id=str(agent.id)
|
||||
)
|
||||
CrewTrainingHandler(filename).save_trained_data(
|
||||
agent_id=str(agent.role), trained_data=result.model_dump()
|
||||
)
|
||||
except Exception as e:
|
||||
self._logger.log("error", f"Training failed: {e}", color="red")
|
||||
CrewTrainingHandler(TRAINING_DATA_FILE).clear()
|
||||
CrewTrainingHandler(filename).clear()
|
||||
raise
|
||||
for agent in train_crew.agents:
|
||||
if training_data.get(str(agent.id)):
|
||||
result = TaskEvaluator(agent).evaluate_training_data(
|
||||
training_data=training_data, agent_id=str(agent.id)
|
||||
)
|
||||
|
||||
CrewTrainingHandler(filename).save_trained_data(
|
||||
agent_id=str(agent.role), trained_data=result.model_dump()
|
||||
)
|
||||
|
||||
def kickoff(
|
||||
self,
|
||||
|
||||
@@ -133,7 +133,6 @@ class LLM:
|
||||
logprobs: Optional[int] = None,
|
||||
top_logprobs: Optional[int] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
api_version: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
callbacks: List[Any] = [],
|
||||
@@ -153,7 +152,6 @@ class LLM:
|
||||
self.logprobs = logprobs
|
||||
self.top_logprobs = top_logprobs
|
||||
self.base_url = base_url
|
||||
self.api_base = api_base
|
||||
self.api_version = api_version
|
||||
self.api_key = api_key
|
||||
self.callbacks = callbacks
|
||||
@@ -234,8 +232,7 @@ class LLM:
|
||||
"seed": self.seed,
|
||||
"logprobs": self.logprobs,
|
||||
"top_logprobs": self.top_logprobs,
|
||||
"api_base": self.api_base,
|
||||
"base_url": self.base_url,
|
||||
"api_base": self.base_url,
|
||||
"api_version": self.api_version,
|
||||
"api_key": self.api_key,
|
||||
"stream": False,
|
||||
|
||||
@@ -431,9 +431,7 @@ class Task(BaseModel):
|
||||
content = (
|
||||
json_output
|
||||
if json_output
|
||||
else pydantic_output.model_dump_json()
|
||||
if pydantic_output
|
||||
else result
|
||||
else pydantic_output.model_dump_json() if pydantic_output else result
|
||||
)
|
||||
self._save_file(content)
|
||||
|
||||
@@ -454,7 +452,7 @@ class Task(BaseModel):
|
||||
return "\n".join(tasks_slices)
|
||||
|
||||
def interpolate_inputs_and_add_conversation_history(
|
||||
self, inputs: Dict[str, Union[str, int, float, Dict[str, Any], List[Any]]]
|
||||
self, inputs: Dict[str, Union[str, int, float]]
|
||||
) -> None:
|
||||
"""Interpolate inputs into the task description, expected output, and output file path.
|
||||
Add conversation history if present.
|
||||
@@ -526,9 +524,7 @@ class Task(BaseModel):
|
||||
)
|
||||
|
||||
def interpolate_only(
|
||||
self,
|
||||
input_string: Optional[str],
|
||||
inputs: Dict[str, Union[str, int, float, Dict[str, Any], List[Any]]],
|
||||
self, input_string: Optional[str], inputs: Dict[str, Union[str, int, float]]
|
||||
) -> str:
|
||||
"""Interpolate placeholders (e.g., {key}) in a string while leaving JSON untouched.
|
||||
|
||||
@@ -536,39 +532,17 @@ class Task(BaseModel):
|
||||
input_string: The string containing template variables to interpolate.
|
||||
Can be None or empty, in which case an empty string is returned.
|
||||
inputs: Dictionary mapping template variables to their values.
|
||||
Supported value types are strings, integers, floats, and dicts/lists
|
||||
containing only these types and other nested dicts/lists.
|
||||
Supported value types are strings, integers, and floats.
|
||||
If input_string is empty or has no placeholders, inputs can be empty.
|
||||
|
||||
Returns:
|
||||
The interpolated string with all template variables replaced with their values.
|
||||
Empty string if input_string is None or empty.
|
||||
|
||||
Raises:
|
||||
ValueError: If a value contains unsupported types
|
||||
ValueError: If a required template variable is missing from inputs.
|
||||
KeyError: If a template variable is not found in the inputs dictionary.
|
||||
"""
|
||||
|
||||
# Validation function for recursive type checking
|
||||
def validate_type(value: Any) -> None:
|
||||
if value is None:
|
||||
return
|
||||
if isinstance(value, (str, int, float, bool)):
|
||||
return
|
||||
if isinstance(value, (dict, list)):
|
||||
for item in value.values() if isinstance(value, dict) else value:
|
||||
validate_type(item)
|
||||
return
|
||||
raise ValueError(
|
||||
f"Unsupported type {type(value).__name__} in inputs. "
|
||||
"Only str, int, float, bool, dict, and list are allowed."
|
||||
)
|
||||
|
||||
# Validate all input values
|
||||
for key, value in inputs.items():
|
||||
try:
|
||||
validate_type(value)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Invalid value for key '{key}': {str(e)}") from e
|
||||
|
||||
if input_string is None or not input_string:
|
||||
return ""
|
||||
if "{" not in input_string and "}" not in input_string:
|
||||
@@ -577,7 +551,15 @@ class Task(BaseModel):
|
||||
raise ValueError(
|
||||
"Inputs dictionary cannot be empty when interpolating variables"
|
||||
)
|
||||
|
||||
try:
|
||||
# Validate input types
|
||||
for key, value in inputs.items():
|
||||
if not isinstance(value, (str, int, float)):
|
||||
raise ValueError(
|
||||
f"Value for key '{key}' must be a string, integer, or float, got {type(value).__name__}"
|
||||
)
|
||||
|
||||
escaped_string = input_string.replace("{", "{{").replace("}", "}}")
|
||||
|
||||
for key in inputs.keys():
|
||||
|
||||
@@ -92,34 +92,13 @@ class TaskEvaluator:
|
||||
"""
|
||||
|
||||
output_training_data = training_data[agent_id]
|
||||
|
||||
final_aggregated_data = ""
|
||||
|
||||
for iteration, data in output_training_data.items():
|
||||
improved_output = data.get("improved_output")
|
||||
initial_output = data.get("initial_output")
|
||||
human_feedback = data.get("human_feedback")
|
||||
|
||||
if not all([improved_output, initial_output, human_feedback]):
|
||||
missing_fields = [
|
||||
field
|
||||
for field in ["improved_output", "initial_output", "human_feedback"]
|
||||
if not data.get(field)
|
||||
]
|
||||
error_msg = (
|
||||
f"Critical training data error: Missing fields ({', '.join(missing_fields)}) "
|
||||
f"for agent {agent_id} in iteration {iteration}.\n"
|
||||
"This indicates a broken training process. "
|
||||
"Cannot proceed with evaluation.\n"
|
||||
"Please check your training implementation."
|
||||
)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
for _, data in output_training_data.items():
|
||||
final_aggregated_data += (
|
||||
f"Iteration: {iteration}\n"
|
||||
f"Initial Output:\n{initial_output}\n\n"
|
||||
f"Human Feedback:\n{human_feedback}\n\n"
|
||||
f"Improved Output:\n{improved_output}\n\n"
|
||||
"------------------------------------------------\n\n"
|
||||
f"Initial Output:\n{data.get('initial_output', '')}\n\n"
|
||||
f"Human Feedback:\n{data.get('human_feedback', '')}\n\n"
|
||||
f"Improved Output:\n{data.get('improved_output', '')}\n\n"
|
||||
)
|
||||
|
||||
evaluation_query = (
|
||||
|
||||
@@ -53,7 +53,6 @@ def create_llm(
|
||||
timeout: Optional[float] = getattr(llm_value, "timeout", None)
|
||||
api_key: Optional[str] = getattr(llm_value, "api_key", None)
|
||||
base_url: Optional[str] = getattr(llm_value, "base_url", None)
|
||||
api_base: Optional[str] = getattr(llm_value, "api_base", None)
|
||||
|
||||
created_llm = LLM(
|
||||
model=model,
|
||||
@@ -63,7 +62,6 @@ def create_llm(
|
||||
timeout=timeout,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_base=api_base,
|
||||
)
|
||||
return created_llm
|
||||
except Exception as e:
|
||||
@@ -103,18 +101,8 @@ def _llm_via_environment_or_fallback() -> Optional[LLM]:
|
||||
callbacks: List[Any] = []
|
||||
|
||||
# Optional base URL from env
|
||||
base_url = (
|
||||
os.environ.get("BASE_URL")
|
||||
or os.environ.get("OPENAI_API_BASE")
|
||||
or os.environ.get("OPENAI_BASE_URL")
|
||||
)
|
||||
|
||||
api_base = os.environ.get("API_BASE") or os.environ.get("AZURE_API_BASE")
|
||||
|
||||
# Synchronize base_url and api_base if one is populated and the other is not
|
||||
if base_url and not api_base:
|
||||
api_base = base_url
|
||||
elif api_base and not base_url:
|
||||
api_base = os.environ.get("OPENAI_API_BASE") or os.environ.get("OPENAI_BASE_URL")
|
||||
if api_base:
|
||||
base_url = api_base
|
||||
|
||||
# Initialize llm_params dictionary
|
||||
@@ -127,7 +115,6 @@ def _llm_via_environment_or_fallback() -> Optional[LLM]:
|
||||
"timeout": timeout,
|
||||
"api_key": api_key,
|
||||
"base_url": base_url,
|
||||
"api_base": api_base,
|
||||
"api_version": api_version,
|
||||
"presence_penalty": presence_penalty,
|
||||
"frequency_penalty": frequency_penalty,
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import os
|
||||
|
||||
from crewai.utilities.file_handler import PickleHandler
|
||||
|
||||
|
||||
@@ -31,10 +29,3 @@ class CrewTrainingHandler(PickleHandler):
|
||||
data[agent_id] = {train_iteration: new_data}
|
||||
|
||||
self.save(data)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear the training data by removing the file or resetting its contents."""
|
||||
if os.path.exists(self.file_path):
|
||||
with open(self.file_path, "wb") as file:
|
||||
# Overwrite with an empty dictionary
|
||||
self.save({})
|
||||
|
||||
@@ -779,43 +779,6 @@ def test_interpolate_only():
|
||||
assert result == no_placeholders
|
||||
|
||||
|
||||
def test_interpolate_only_with_dict_inside_expected_output():
|
||||
"""Test the interpolate_only method for various scenarios including JSON structure preservation."""
|
||||
task = Task(
|
||||
description="Unused in this test",
|
||||
expected_output="Unused in this test: {questions}",
|
||||
)
|
||||
|
||||
json_string = '{"questions": {"main_question": "What is the user\'s name?", "secondary_question": "What is the user\'s age?"}}'
|
||||
result = task.interpolate_only(
|
||||
input_string=json_string,
|
||||
inputs={
|
||||
"questions": {
|
||||
"main_question": "What is the user's name?",
|
||||
"secondary_question": "What is the user's age?",
|
||||
}
|
||||
},
|
||||
)
|
||||
assert '"main_question": "What is the user\'s name?"' in result
|
||||
assert '"secondary_question": "What is the user\'s age?"' in result
|
||||
assert result == json_string
|
||||
|
||||
normal_string = "Hello {name}, welcome to {place}!"
|
||||
result = task.interpolate_only(
|
||||
input_string=normal_string, inputs={"name": "John", "place": "CrewAI"}
|
||||
)
|
||||
assert result == "Hello John, welcome to CrewAI!"
|
||||
|
||||
result = task.interpolate_only(input_string="", inputs={"unused": "value"})
|
||||
assert result == ""
|
||||
|
||||
no_placeholders = "Hello, this is a test"
|
||||
result = task.interpolate_only(
|
||||
input_string=no_placeholders, inputs={"unused": "value"}
|
||||
)
|
||||
assert result == no_placeholders
|
||||
|
||||
|
||||
def test_task_output_str_with_pydantic():
|
||||
from crewai.tasks.output_format import OutputFormat
|
||||
|
||||
@@ -1003,283 +966,3 @@ def test_task_execution_times():
|
||||
assert task.start_time is not None
|
||||
assert task.end_time is not None
|
||||
assert task.execution_duration == (task.end_time - task.start_time).total_seconds()
|
||||
|
||||
|
||||
def test_interpolate_with_list_of_strings():
|
||||
task = Task(
|
||||
description="Test list interpolation",
|
||||
expected_output="List: {items}",
|
||||
)
|
||||
|
||||
# Test simple list of strings
|
||||
input_str = "Available items: {items}"
|
||||
inputs = {"items": ["apple", "banana", "cherry"]}
|
||||
result = task.interpolate_only(input_str, inputs)
|
||||
assert result == f"Available items: {inputs['items']}"
|
||||
|
||||
# Test empty list
|
||||
empty_list_input = {"items": []}
|
||||
result = task.interpolate_only(input_str, empty_list_input)
|
||||
assert result == "Available items: []"
|
||||
|
||||
|
||||
def test_interpolate_with_list_of_dicts():
|
||||
task = Task(
|
||||
description="Test list of dicts interpolation",
|
||||
expected_output="People: {people}",
|
||||
)
|
||||
|
||||
input_data = {
|
||||
"people": [
|
||||
{"name": "Alice", "age": 30, "skills": ["Python", "AI"]},
|
||||
{"name": "Bob", "age": 25, "skills": ["Java", "Cloud"]},
|
||||
]
|
||||
}
|
||||
result = task.interpolate_only("{people}", input_data)
|
||||
|
||||
parsed_result = eval(result)
|
||||
assert isinstance(parsed_result, list)
|
||||
assert len(parsed_result) == 2
|
||||
assert parsed_result[0]["name"] == "Alice"
|
||||
assert parsed_result[0]["age"] == 30
|
||||
assert parsed_result[0]["skills"] == ["Python", "AI"]
|
||||
assert parsed_result[1]["name"] == "Bob"
|
||||
assert parsed_result[1]["age"] == 25
|
||||
assert parsed_result[1]["skills"] == ["Java", "Cloud"]
|
||||
|
||||
|
||||
def test_interpolate_with_nested_structures():
|
||||
task = Task(
|
||||
description="Test nested structures",
|
||||
expected_output="Company: {company}",
|
||||
)
|
||||
|
||||
input_data = {
|
||||
"company": {
|
||||
"name": "TechCorp",
|
||||
"departments": [
|
||||
{
|
||||
"name": "Engineering",
|
||||
"employees": 50,
|
||||
"tools": ["Git", "Docker", "Kubernetes"],
|
||||
},
|
||||
{"name": "Sales", "employees": 20, "regions": {"north": 5, "south": 3}},
|
||||
],
|
||||
}
|
||||
}
|
||||
result = task.interpolate_only("{company}", input_data)
|
||||
parsed = eval(result)
|
||||
|
||||
assert parsed["name"] == "TechCorp"
|
||||
assert len(parsed["departments"]) == 2
|
||||
assert parsed["departments"][0]["tools"] == ["Git", "Docker", "Kubernetes"]
|
||||
assert parsed["departments"][1]["regions"]["north"] == 5
|
||||
|
||||
|
||||
def test_interpolate_with_special_characters():
|
||||
task = Task(
|
||||
description="Test special characters in dicts",
|
||||
expected_output="Data: {special_data}",
|
||||
)
|
||||
|
||||
input_data = {
|
||||
"special_data": {
|
||||
"quotes": """This has "double" and 'single' quotes""",
|
||||
"unicode": "文字化けテスト",
|
||||
"symbols": "!@#$%^&*()",
|
||||
"empty": "",
|
||||
}
|
||||
}
|
||||
result = task.interpolate_only("{special_data}", input_data)
|
||||
parsed = eval(result)
|
||||
|
||||
assert parsed["quotes"] == """This has "double" and 'single' quotes"""
|
||||
assert parsed["unicode"] == "文字化けテスト"
|
||||
assert parsed["symbols"] == "!@#$%^&*()"
|
||||
assert parsed["empty"] == ""
|
||||
|
||||
|
||||
def test_interpolate_mixed_types():
|
||||
task = Task(
|
||||
description="Test mixed type interpolation",
|
||||
expected_output="Mixed: {data}",
|
||||
)
|
||||
|
||||
input_data = {
|
||||
"data": {
|
||||
"name": "Test Dataset",
|
||||
"samples": 1000,
|
||||
"features": ["age", "income", "location"],
|
||||
"metadata": {
|
||||
"source": "public",
|
||||
"validated": True,
|
||||
"tags": ["demo", "test", "temp"],
|
||||
},
|
||||
}
|
||||
}
|
||||
result = task.interpolate_only("{data}", input_data)
|
||||
parsed = eval(result)
|
||||
|
||||
assert parsed["name"] == "Test Dataset"
|
||||
assert parsed["samples"] == 1000
|
||||
assert parsed["metadata"]["tags"] == ["demo", "test", "temp"]
|
||||
|
||||
|
||||
def test_interpolate_complex_combination():
|
||||
task = Task(
|
||||
description="Test complex combination",
|
||||
expected_output="Report: {report}",
|
||||
)
|
||||
|
||||
input_data = {
|
||||
"report": [
|
||||
{
|
||||
"month": "January",
|
||||
"metrics": {"sales": 15000, "expenses": 8000, "profit": 7000},
|
||||
"top_products": ["Product A", "Product B"],
|
||||
},
|
||||
{
|
||||
"month": "February",
|
||||
"metrics": {"sales": 18000, "expenses": 8500, "profit": 9500},
|
||||
"top_products": ["Product C", "Product D"],
|
||||
},
|
||||
]
|
||||
}
|
||||
result = task.interpolate_only("{report}", input_data)
|
||||
parsed = eval(result)
|
||||
|
||||
assert len(parsed) == 2
|
||||
assert parsed[0]["month"] == "January"
|
||||
assert parsed[1]["metrics"]["profit"] == 9500
|
||||
assert "Product D" in parsed[1]["top_products"]
|
||||
|
||||
|
||||
def test_interpolate_invalid_type_validation():
|
||||
task = Task(
|
||||
description="Test invalid type validation",
|
||||
expected_output="Should never reach here",
|
||||
)
|
||||
|
||||
# Test with invalid top-level type
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
task.interpolate_only("{data}", {"data": set()}) # type: ignore we are purposely testing this failure
|
||||
|
||||
assert "Unsupported type set" in str(excinfo.value)
|
||||
|
||||
# Test with invalid nested type
|
||||
invalid_nested = {
|
||||
"profile": {
|
||||
"name": "John",
|
||||
"age": 30,
|
||||
"tags": {"a", "b", "c"}, # Set is invalid
|
||||
}
|
||||
}
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
task.interpolate_only("{data}", {"data": invalid_nested})
|
||||
assert "Unsupported type set" in str(excinfo.value)
|
||||
|
||||
|
||||
def test_interpolate_custom_object_validation():
|
||||
task = Task(
|
||||
description="Test custom object rejection",
|
||||
expected_output="Should never reach here",
|
||||
)
|
||||
|
||||
class CustomObject:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
# Test with custom object at top level
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
task.interpolate_only("{obj}", {"obj": CustomObject(5)}) # type: ignore we are purposely testing this failure
|
||||
assert "Unsupported type CustomObject" in str(excinfo.value)
|
||||
|
||||
# Test with nested custom object in dictionary
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
task.interpolate_only(
|
||||
"{data}", {"data": {"valid": 1, "invalid": CustomObject(5)}}
|
||||
)
|
||||
assert "Unsupported type CustomObject" in str(excinfo.value)
|
||||
|
||||
# Test with nested custom object in list
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
task.interpolate_only("{data}", {"data": [1, "valid", CustomObject(5)]})
|
||||
assert "Unsupported type CustomObject" in str(excinfo.value)
|
||||
|
||||
# Test with deeply nested custom object
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
task.interpolate_only(
|
||||
"{data}", {"data": {"level1": {"level2": [{"level3": CustomObject(5)}]}}}
|
||||
)
|
||||
assert "Unsupported type CustomObject" in str(excinfo.value)
|
||||
|
||||
|
||||
def test_interpolate_valid_complex_types():
|
||||
task = Task(
|
||||
description="Test valid complex types",
|
||||
expected_output="Validation should pass",
|
||||
)
|
||||
|
||||
# Valid complex structure
|
||||
valid_data = {
|
||||
"name": "Valid Dataset",
|
||||
"stats": {
|
||||
"count": 1000,
|
||||
"distribution": [0.2, 0.3, 0.5],
|
||||
"features": ["age", "income"],
|
||||
"nested": {"deep": [1, 2, 3], "deeper": {"a": 1, "b": 2.5}},
|
||||
},
|
||||
}
|
||||
|
||||
# Should not raise any errors
|
||||
result = task.interpolate_only("{data}", {"data": valid_data})
|
||||
parsed = eval(result)
|
||||
assert parsed["name"] == "Valid Dataset"
|
||||
assert parsed["stats"]["nested"]["deeper"]["b"] == 2.5
|
||||
|
||||
|
||||
def test_interpolate_edge_cases():
|
||||
task = Task(
|
||||
description="Test edge cases",
|
||||
expected_output="Edge case handling",
|
||||
)
|
||||
|
||||
# Test empty dict and list
|
||||
assert task.interpolate_only("{}", {"data": {}}) == "{}"
|
||||
assert task.interpolate_only("[]", {"data": []}) == "[]"
|
||||
|
||||
# Test numeric types
|
||||
assert task.interpolate_only("{num}", {"num": 42}) == "42"
|
||||
assert task.interpolate_only("{num}", {"num": 3.14}) == "3.14"
|
||||
|
||||
# Test boolean values (valid JSON types)
|
||||
assert task.interpolate_only("{flag}", {"flag": True}) == "True"
|
||||
assert task.interpolate_only("{flag}", {"flag": False}) == "False"
|
||||
|
||||
|
||||
def test_interpolate_valid_types():
|
||||
task = Task(
|
||||
description="Test valid types including null and boolean",
|
||||
expected_output="Should pass validation",
|
||||
)
|
||||
|
||||
# Test with boolean and null values (valid JSON types)
|
||||
valid_data = {
|
||||
"name": "Test",
|
||||
"active": True,
|
||||
"deleted": False,
|
||||
"optional": None,
|
||||
"nested": {"flag": True, "empty": None},
|
||||
}
|
||||
|
||||
result = task.interpolate_only("{data}", {"data": valid_data})
|
||||
parsed = eval(result)
|
||||
|
||||
assert parsed["active"] is True
|
||||
assert parsed["deleted"] is False
|
||||
assert parsed["optional"] is None
|
||||
assert parsed["nested"]["flag"] is True
|
||||
assert parsed["nested"]["empty"] is None
|
||||
|
||||
@@ -48,9 +48,9 @@ def test_evaluate_training_data(converter_mock):
|
||||
mock.call(
|
||||
llm=original_agent.llm,
|
||||
text="Assess the quality of the training data based on the llm output, human feedback , and llm "
|
||||
"output improved result.\n\nIteration: data1\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
|
||||
"1\n\nImproved Output:\nImproved output 1\n\n------------------------------------------------\n\nIteration: data2\nInitial Output:\nInitial output 2\n\nHuman "
|
||||
"Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\n------------------------------------------------\n\nPlease provide:\n- Provide "
|
||||
"output improved result.\n\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
|
||||
"1\n\nImproved Output:\nImproved output 1\n\nInitial Output:\nInitial output 2\n\nHuman "
|
||||
"Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\nPlease provide:\n- Provide "
|
||||
"a list of clear, actionable instructions derived from the Human Feedbacks to enhance the Agent's "
|
||||
"performance. Analyze the differences between Initial Outputs and Improved Outputs to generate specific "
|
||||
"action items for future tasks. Ensure all key and specificpoints from the human feedback are "
|
||||
|
||||
Reference in New Issue
Block a user