mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 00:02:36 +00:00
improved
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
||||
from typing import Any, Callable, Dict, List, Optional, Union, cast, Tuple
|
||||
import json
|
||||
|
||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||
@@ -10,6 +10,7 @@ from crewai.agents.parser import (
|
||||
OutputParserException,
|
||||
)
|
||||
from crewai.agents.tools_handler import ToolsHandler
|
||||
from crewai.tools.agent_tools.scratchpad_tool import ScratchpadTool
|
||||
from crewai.llm import BaseLLM
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
from crewai.tools.structured_tool import CrewStructuredTool
|
||||
@@ -19,6 +20,7 @@ from crewai.utilities.agent_utils import (
|
||||
enforce_rpm_limit,
|
||||
format_message_for_llm,
|
||||
get_llm_response,
|
||||
get_tool_names,
|
||||
handle_agent_action_core,
|
||||
handle_context_length,
|
||||
handle_max_iterations_exceeded,
|
||||
@@ -27,7 +29,9 @@ from crewai.utilities.agent_utils import (
|
||||
has_reached_max_iterations,
|
||||
is_context_length_exceeded,
|
||||
process_llm_response,
|
||||
render_text_description_and_args,
|
||||
show_agent_logs,
|
||||
parse_tools,
|
||||
)
|
||||
from crewai.utilities.constants import MAX_LLM_RETRY, TRAINING_DATA_FILE
|
||||
from crewai.utilities.logger import Logger
|
||||
@@ -89,6 +93,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self.agent_state: AgentState = AgentState(
|
||||
task_id=str(task.id) if task else None
|
||||
)
|
||||
self.scratchpad_tool: Optional[ScratchpadTool] = None
|
||||
existing_stop = self.llm.stop or []
|
||||
self.llm.stop = list(
|
||||
set(
|
||||
@@ -98,6 +103,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
)
|
||||
)
|
||||
|
||||
# Initialize scratchpad tool if reasoning is enabled
|
||||
if hasattr(self.agent, "reasoning") and self.agent.reasoning:
|
||||
self._initialize_scratchpad_tool()
|
||||
|
||||
def _initialize_scratchpad_tool(self) -> None:
|
||||
"""Initialize the scratchpad tool and add it to available tools."""
|
||||
self.scratchpad_tool = ScratchpadTool(scratchpad_data=self.agent_state.scratchpad)
|
||||
|
||||
# Add to tools list if not already present
|
||||
tool_names = [tool.name for tool in self.tools]
|
||||
if self.scratchpad_tool.name not in tool_names:
|
||||
# Use parse_tools to convert to CrewStructuredTool
|
||||
parsed_scratchpad_tools = parse_tools([self.scratchpad_tool])
|
||||
if parsed_scratchpad_tools:
|
||||
structured_scratchpad_tool = parsed_scratchpad_tools[0]
|
||||
self.tools.append(structured_scratchpad_tool)
|
||||
|
||||
# Update tool mappings
|
||||
self.tool_name_to_tool_map[self.scratchpad_tool.name] = structured_scratchpad_tool
|
||||
|
||||
# Update tools names and descriptions
|
||||
self.tools_names = get_tool_names(self.tools)
|
||||
self.tools_description = render_text_description_and_args(self.tools)
|
||||
|
||||
def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
|
||||
# Reset agent state for new task execution
|
||||
self.agent_state.reset(task_id=str(self.task.id) if self.task else None)
|
||||
@@ -204,7 +233,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
formatted_answer = None
|
||||
while not isinstance(formatted_answer, AgentFinish):
|
||||
try:
|
||||
print(f"\n[DEBUG] Starting iteration {self.iterations + 1}, max_iter: {self.max_iter}")
|
||||
|
||||
if has_reached_max_iterations(self.iterations, self.max_iter):
|
||||
print(f"[DEBUG] Max iterations reached")
|
||||
formatted_answer = handle_max_iterations_exceeded(
|
||||
formatted_answer,
|
||||
printer=self._printer,
|
||||
@@ -216,15 +248,67 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
print(f"[DEBUG] About to call LLM with {len(self.messages)} messages")
|
||||
answer = get_llm_response(
|
||||
llm=self.llm,
|
||||
messages=self.messages,
|
||||
callbacks=self.callbacks,
|
||||
printer=self._printer,
|
||||
)
|
||||
print(f"[DEBUG] LLM response received: {answer[:100]}..." if answer else "[DEBUG] No LLM response")
|
||||
|
||||
formatted_answer = process_llm_response(answer, self.use_stop_words)
|
||||
print(f"[DEBUG] Formatted answer type: {type(formatted_answer).__name__}")
|
||||
|
||||
# Check if agent is trying to finish but hasn't met criteria
|
||||
if isinstance(formatted_answer, AgentFinish):
|
||||
print(f"[DEBUG] Agent trying to finish - checking acceptance criteria")
|
||||
# Validate acceptance criteria if reasoning is enabled and criteria exist
|
||||
if (hasattr(self.agent, "reasoning") and self.agent.reasoning
|
||||
and self.agent_state.acceptance_criteria):
|
||||
|
||||
self._printer.print(
|
||||
content="\nValidating acceptance criteria before finalizing...",
|
||||
color="cyan"
|
||||
)
|
||||
|
||||
print(f"[DEBUG] Starting validation of {len(self.agent_state.acceptance_criteria)} criteria")
|
||||
is_valid, unmet_criteria = self._validate_acceptance_criteria(formatted_answer.output)
|
||||
print(f"[DEBUG] Validation result: is_valid={is_valid}, unmet={len(unmet_criteria)}")
|
||||
|
||||
if not is_valid:
|
||||
# Prevent task completion and force retry
|
||||
self._printer.print(
|
||||
content=f"\n❌ Cannot finalize - {len(unmet_criteria)} acceptance criteria not met:",
|
||||
color="red"
|
||||
)
|
||||
for criterion in unmet_criteria:
|
||||
self._printer.print(
|
||||
content=f" • {criterion}",
|
||||
color="yellow"
|
||||
)
|
||||
|
||||
# Create retry prompt
|
||||
print(f"[DEBUG] Creating criteria retry prompt")
|
||||
retry_prompt = self._create_criteria_retry_prompt(unmet_criteria)
|
||||
|
||||
# Add retry prompt to messages
|
||||
self._append_message(retry_prompt, role="user")
|
||||
|
||||
# Force another iteration by resetting formatted_answer
|
||||
formatted_answer = None
|
||||
print(f"[DEBUG] Forcing another iteration due to unmet criteria")
|
||||
|
||||
# Continue the loop
|
||||
continue
|
||||
else:
|
||||
self._printer.print(
|
||||
content="\n✅ All acceptance criteria met!",
|
||||
color="green"
|
||||
)
|
||||
|
||||
if isinstance(formatted_answer, AgentAction):
|
||||
print(f"[DEBUG] Agent action: tool={formatted_answer.tool}")
|
||||
# Extract agent fingerprint if available
|
||||
fingerprint_context = {}
|
||||
if (
|
||||
@@ -238,6 +322,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
)
|
||||
}
|
||||
|
||||
print(f"[DEBUG] Executing tool: {formatted_answer.tool}")
|
||||
tool_result = execute_tool_and_check_finality(
|
||||
agent_action=formatted_answer,
|
||||
fingerprint_context=fingerprint_context,
|
||||
@@ -250,6 +335,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
agent=self.agent,
|
||||
function_calling_llm=self.function_calling_llm,
|
||||
)
|
||||
print(f"[DEBUG] Tool execution completed")
|
||||
|
||||
formatted_answer = self._handle_agent_action(
|
||||
formatted_answer, tool_result
|
||||
)
|
||||
@@ -297,7 +384,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
hasattr(self.agent, "reasoning")
|
||||
and self.agent.reasoning
|
||||
and tool_result
|
||||
and formatted_answer.tool != "Access Scratchpad Memory" # Skip scratchpad tool itself
|
||||
and self._is_tool_execution_successful(tool_result) # Only for successful executions
|
||||
):
|
||||
print(f"[DEBUG] Starting scratchpad extraction for {formatted_answer.tool}")
|
||||
self._extract_tool_result_to_scratchpad(
|
||||
tool_name=formatted_answer.tool,
|
||||
tool_args=tool_args,
|
||||
@@ -319,8 +409,15 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
# Increment steps in agent state
|
||||
self.agent_state.increment_steps()
|
||||
|
||||
# Update scratchpad tool if it exists
|
||||
if self.scratchpad_tool and self.agent_state.scratchpad:
|
||||
print(f"[DEBUG] Updating scratchpad tool")
|
||||
self._update_scratchpad_tool()
|
||||
|
||||
if self._should_trigger_reasoning():
|
||||
print(f"[DEBUG] Triggering mid-execution reasoning")
|
||||
self._handle_mid_execution_reasoning()
|
||||
print(f"[DEBUG] Mid-execution reasoning completed")
|
||||
else:
|
||||
self.steps_since_reasoning += 1
|
||||
|
||||
@@ -328,6 +425,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self._append_message(formatted_answer.text, role="assistant")
|
||||
|
||||
except OutputParserException as e:
|
||||
print(f"[DEBUG] OutputParserException: {str(e)}")
|
||||
formatted_answer = handle_output_parser_exception(
|
||||
e=e,
|
||||
messages=self.messages,
|
||||
@@ -337,10 +435,12 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] Exception in invoke loop: {type(e).__name__}: {str(e)}")
|
||||
if e.__class__.__module__.startswith("litellm"):
|
||||
# Do not retry on litellm errors
|
||||
raise e
|
||||
if is_context_length_exceeded(e):
|
||||
print(f"[DEBUG] Context length exceeded, handling...")
|
||||
handle_context_length(
|
||||
respect_context_window=self.respect_context_window,
|
||||
printer=self._printer,
|
||||
@@ -357,12 +457,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
raise e
|
||||
finally:
|
||||
self.iterations += 1
|
||||
print(f"[DEBUG] Iteration {self.iterations} completed")
|
||||
|
||||
# During the invoke loop, formatted_answer alternates between AgentAction
|
||||
# (when the agent is using tools) and eventually becomes AgentFinish
|
||||
# (when the agent reaches a final answer). This assertion confirms we've
|
||||
# reached a final answer and helps type checking understand this transition.
|
||||
assert isinstance(formatted_answer, AgentFinish)
|
||||
|
||||
self._show_logs(formatted_answer)
|
||||
return formatted_answer
|
||||
|
||||
@@ -846,8 +948,44 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
tool_args: Arguments that were passed to the tool
|
||||
tool_result: The result returned by the tool
|
||||
"""
|
||||
print(f"[DEBUG] _extract_tool_result_to_scratchpad started for tool: {tool_name}")
|
||||
try:
|
||||
# Check result size and potentially skip LLM extraction for very large results
|
||||
result_str = str(tool_result.result)
|
||||
result_size = len(result_str)
|
||||
print(f"[DEBUG] Tool result size: {result_size} characters")
|
||||
|
||||
# For very large results (>100KB), skip LLM extraction and store directly
|
||||
if result_size > 100000:
|
||||
print(f"[DEBUG] Result too large ({result_size} chars), storing directly without LLM extraction")
|
||||
scratchpad_key = tool_name.replace("_", "")
|
||||
|
||||
# Try to parse as JSON if possible
|
||||
try:
|
||||
if isinstance(tool_result.result, str):
|
||||
result_data = json.loads(tool_result.result)
|
||||
else:
|
||||
result_data = tool_result.result
|
||||
except:
|
||||
result_data = tool_result.result
|
||||
|
||||
self.agent_state.add_to_scratchpad(
|
||||
scratchpad_key,
|
||||
{
|
||||
"data": result_data,
|
||||
"tool": tool_name,
|
||||
"tool_args": tool_args,
|
||||
"large_result": True,
|
||||
"size": result_size
|
||||
}
|
||||
)
|
||||
print(f"[DEBUG] Large result stored directly to scratchpad")
|
||||
return
|
||||
|
||||
# Create a prompt for the LLM to extract relevant information
|
||||
result_preview = str(tool_result.result)[:200] + "..." if len(str(tool_result.result)) > 200 else str(tool_result.result)
|
||||
print(f"[DEBUG] Tool result preview: {result_preview}")
|
||||
|
||||
extraction_prompt = f"""Given the following tool execution result, extract and summarize the most relevant information that would be useful for completing the current task.
|
||||
|
||||
Tool Name: {tool_name}
|
||||
@@ -866,6 +1004,7 @@ Instructions:
|
||||
4. Summarize in a concise format (max 3-5 bullet points)
|
||||
5. Focus on information that will be useful for subsequent steps
|
||||
6. Generate a descriptive key name that explains what data is being stored (e.g., "email_and_thread_ids", "search_results", "file_contents", etc.)
|
||||
7. IMPORTANT: When extracting data_points, include ALL items from lists or collections, do not truncate or summarize the data
|
||||
|
||||
Respond in the following JSON format:
|
||||
{{
|
||||
@@ -874,24 +1013,30 @@ Respond in the following JSON format:
|
||||
"data_points": {{"key": "value", ...}} or [list of items],
|
||||
"issues": ["issue1", "issue2", ...] or null if none,
|
||||
"relevance_score": 1-10 (how relevant this result is to the task)
|
||||
}}"""
|
||||
}}
|
||||
|
||||
Note: For data_points, preserve the complete data structure. If it's a list of items (like email IDs, search results, etc.), include ALL items."""
|
||||
|
||||
# Create messages for LLM call
|
||||
messages = [format_message_for_llm(extraction_prompt, role="user")]
|
||||
|
||||
# Call LLM to extract information
|
||||
try:
|
||||
print(f"[DEBUG] Calling LLM for scratchpad extraction...")
|
||||
extraction_response = get_llm_response(
|
||||
llm=self.llm,
|
||||
messages=messages,
|
||||
callbacks=self.callbacks,
|
||||
printer=self._printer,
|
||||
)
|
||||
print(f"[DEBUG] LLM extraction response received, length: {len(extraction_response)}")
|
||||
|
||||
# Try to parse the JSON response directly
|
||||
try:
|
||||
extracted_info = json.loads(extraction_response)
|
||||
print(f"[DEBUG] Successfully parsed JSON directly")
|
||||
except json.JSONDecodeError:
|
||||
print(f"[DEBUG] Failed to parse JSON directly, trying to extract from markdown...")
|
||||
# If direct parsing fails, try to extract JSON from the response
|
||||
# The LLM might have wrapped it in markdown code blocks or added extra text
|
||||
json_match = None
|
||||
@@ -903,16 +1048,19 @@ Respond in the following JSON format:
|
||||
matches = re.findall(json_pattern, extraction_response, re.DOTALL)
|
||||
|
||||
if matches:
|
||||
print(f"[DEBUG] Found {len(matches)} JSON blocks in markdown")
|
||||
# Try to parse the first match
|
||||
for match in matches:
|
||||
try:
|
||||
json_match = json.loads(match)
|
||||
print(f"[DEBUG] Successfully parsed JSON from markdown")
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# If no markdown JSON found, try to find raw JSON object
|
||||
if not json_match:
|
||||
print(f"[DEBUG] No markdown JSON found, looking for raw JSON...")
|
||||
# Look for JSON object in the response
|
||||
json_start = extraction_response.find("{")
|
||||
json_end = extraction_response.rfind("}")
|
||||
@@ -926,13 +1074,16 @@ Respond in the following JSON format:
|
||||
json_start : json_end + 1
|
||||
]
|
||||
json_match = json.loads(potential_json)
|
||||
print(f"[DEBUG] Successfully extracted raw JSON")
|
||||
except json.JSONDecodeError:
|
||||
print(f"[DEBUG] Failed to parse raw JSON")
|
||||
pass
|
||||
|
||||
if json_match:
|
||||
extracted_info = json_match
|
||||
else:
|
||||
# Couldn't parse JSON, raise to trigger fallback
|
||||
print(f"[DEBUG] Could not extract any valid JSON, triggering fallback")
|
||||
raise json.JSONDecodeError(
|
||||
"Could not extract JSON", extraction_response, 0
|
||||
)
|
||||
@@ -945,6 +1096,7 @@ Respond in the following JSON format:
|
||||
else:
|
||||
# Generate a meaningful key from tool name
|
||||
scratchpad_key = tool_name.replace("_", "")
|
||||
print(f"[DEBUG] Using scratchpad key: {scratchpad_key}")
|
||||
|
||||
# Get the data points
|
||||
data_points = extracted_info.get("data_points", {})
|
||||
@@ -965,9 +1117,12 @@ Respond in the following JSON format:
|
||||
|
||||
# Store based on relevance score
|
||||
relevance_score = extracted_info.get("relevance_score", 0)
|
||||
print(f"[DEBUG] Relevance score: {relevance_score}")
|
||||
|
||||
if relevance_score >= 7:
|
||||
# For high relevance, store just the data
|
||||
self.agent_state.add_to_scratchpad(scratchpad_key, data_to_store)
|
||||
print(f"[DEBUG] Stored high relevance data to scratchpad")
|
||||
else:
|
||||
# For lower relevance, include more context
|
||||
self.agent_state.add_to_scratchpad(
|
||||
@@ -979,6 +1134,7 @@ Respond in the following JSON format:
|
||||
"relevance": relevance_score,
|
||||
},
|
||||
)
|
||||
print(f"[DEBUG] Stored lower relevance data with context to scratchpad")
|
||||
|
||||
# Also store key findings if present and relevance is high
|
||||
if relevance_score >= 7 and extracted_info.get("key_findings"):
|
||||
@@ -989,39 +1145,304 @@ Respond in the following JSON format:
|
||||
self.agent_state.add_to_scratchpad(
|
||||
"key_findings", current_findings[-10:]
|
||||
)
|
||||
print(f"[DEBUG] Updated key findings in scratchpad")
|
||||
|
||||
except (json.JSONDecodeError, KeyError, TypeError):
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
print(f"[DEBUG] Exception during extraction: {type(e).__name__}: {str(e)}")
|
||||
# Fallback for when we can't extract structured data
|
||||
# Try to generate a meaningful key name from tool name
|
||||
scratchpad_key = tool_name.replace("_", "")
|
||||
|
||||
# Store a preview of the result
|
||||
# Store the complete result without truncation
|
||||
self.agent_state.add_to_scratchpad(
|
||||
scratchpad_key,
|
||||
{
|
||||
"raw_response": extraction_response[:500] + "..."
|
||||
if len(extraction_response) > 500
|
||||
else extraction_response,
|
||||
"tool_result_preview": str(tool_result.result)[:300] + "..."
|
||||
if len(str(tool_result.result)) > 300
|
||||
else str(tool_result.result),
|
||||
"raw_response": extraction_response, # Store complete response
|
||||
"tool_result": tool_result.result, # Store complete result
|
||||
"extraction_failed": True,
|
||||
"tool_args": tool_args
|
||||
},
|
||||
)
|
||||
print(f"[DEBUG] Stored fallback data to scratchpad")
|
||||
|
||||
except Exception as e:
|
||||
# Log error but don't fail the entire execution
|
||||
print(f"[DEBUG] Failed to extract tool result: {type(e).__name__}: {str(e)}")
|
||||
self._printer.print(
|
||||
content=f"Failed to extract tool result to scratchpad: {str(e)}",
|
||||
color="yellow",
|
||||
)
|
||||
# Still store basic information even if extraction fails
|
||||
# Still store complete information even if extraction fails
|
||||
fallback_key = f"{tool_name}_raw_{self.agent_state.steps_completed}"
|
||||
self.agent_state.add_to_scratchpad(
|
||||
fallback_key,
|
||||
{
|
||||
"error": f"Extraction failed: {str(e)}",
|
||||
"raw_preview": str(tool_result.result)[:200] + "..."
|
||||
if len(str(tool_result.result)) > 200
|
||||
else str(tool_result.result),
|
||||
"tool_result": tool_result.result, # Store complete result
|
||||
"tool_name": tool_name,
|
||||
"tool_args": tool_args,
|
||||
"raw_data": True
|
||||
},
|
||||
)
|
||||
print(f"[DEBUG] Stored error fallback data to scratchpad")
|
||||
|
||||
print(f"[DEBUG] _extract_tool_result_to_scratchpad completed")
|
||||
|
||||
def _update_scratchpad_tool(self) -> None:
|
||||
"""Update the scratchpad tool with current state data."""
|
||||
if not self.scratchpad_tool:
|
||||
return
|
||||
|
||||
# Update the tool's data
|
||||
self.scratchpad_tool.update_scratchpad(self.agent_state.scratchpad)
|
||||
|
||||
# Find and update the tool in our tools list
|
||||
for i, tool in enumerate(self.tools):
|
||||
if hasattr(tool, 'name') and tool.name == self.scratchpad_tool.name:
|
||||
# Update the description on the existing tool reference
|
||||
if hasattr(tool, '_tool') and hasattr(tool._tool, 'description'):
|
||||
tool._tool.description = self.scratchpad_tool.description
|
||||
elif hasattr(tool, 'description'):
|
||||
tool.description = self.scratchpad_tool.description
|
||||
break
|
||||
|
||||
# Regenerate tools description to reflect the updated tool
|
||||
self.tools_description = render_text_description_and_args(self.tools)
|
||||
|
||||
def _validate_acceptance_criteria(self, output: str) -> Tuple[bool, List[str]]:
|
||||
"""Validate if the output meets acceptance criteria.
|
||||
|
||||
Args:
|
||||
output: The final output to validate
|
||||
|
||||
Returns:
|
||||
Tuple[bool, List[str]]: (is_valid, list of unmet criteria)
|
||||
"""
|
||||
print(f"[DEBUG] _validate_acceptance_criteria started")
|
||||
if not self.agent_state.acceptance_criteria:
|
||||
# No criteria to validate
|
||||
print(f"[DEBUG] No acceptance criteria to validate")
|
||||
return True, []
|
||||
|
||||
# Create a single prompt to check all criteria
|
||||
criteria_list = "\n".join(
|
||||
f"{i}. {criterion}"
|
||||
for i, criterion in enumerate(self.agent_state.acceptance_criteria, 1)
|
||||
)
|
||||
print(f"[DEBUG] Validating {len(self.agent_state.acceptance_criteria)} criteria")
|
||||
|
||||
validation_prompt = f"""Given the following task output and acceptance criteria, identify which criteria have NOT been met.
|
||||
|
||||
Task Output:
|
||||
{output}
|
||||
|
||||
Expected Output Description:
|
||||
{self.task.expected_output if self.task else "Not specified"}
|
||||
|
||||
Acceptance Criteria:
|
||||
{criteria_list}
|
||||
|
||||
For each criterion, determine if it has been met or not met in the output.
|
||||
Respond with a JSON object where keys are criterion numbers (1, 2, 3, etc.) and values are:
|
||||
- "MET" if the criterion is satisfied
|
||||
- "NOT MET: <brief reason>" if the criterion is not satisfied
|
||||
|
||||
Example response format:
|
||||
{{
|
||||
"1": "MET",
|
||||
"2": "NOT MET: Missing specific examples",
|
||||
"3": "MET"
|
||||
}}
|
||||
"""
|
||||
|
||||
try:
|
||||
print(f"[DEBUG] Calling LLM for criteria validation...")
|
||||
response = self.llm.call([
|
||||
{"role": "user", "content": validation_prompt}
|
||||
])
|
||||
print(f"[DEBUG] LLM validation response received")
|
||||
|
||||
# Parse the response as JSON
|
||||
import json
|
||||
response_str = str(response).strip()
|
||||
|
||||
# Try to extract JSON from the response
|
||||
json_start = response_str.find('{')
|
||||
json_end = response_str.rfind('}') + 1
|
||||
if json_start >= 0 and json_end > json_start:
|
||||
json_str = response_str[json_start:json_end]
|
||||
validation_results = json.loads(json_str)
|
||||
print(f"[DEBUG] Successfully parsed validation JSON")
|
||||
else:
|
||||
# Fallback if JSON not found
|
||||
self._logger.log("warning", f"Could not parse validation response as JSON: {response_str}")
|
||||
print(f"[DEBUG] Failed to parse validation response as JSON")
|
||||
# Assume all criteria not met if we can't parse
|
||||
return False, self.agent_state.acceptance_criteria
|
||||
|
||||
# Process results
|
||||
unmet_criteria = []
|
||||
for i, criterion in enumerate(self.agent_state.acceptance_criteria, 1):
|
||||
result = validation_results.get(str(i), "NOT MET")
|
||||
if isinstance(result, str) and result.upper().startswith("NOT MET"):
|
||||
unmet_criteria.append(criterion)
|
||||
self._printer.print(
|
||||
content=f"✗ Criterion not met: {criterion}",
|
||||
color="yellow"
|
||||
)
|
||||
else:
|
||||
self._printer.print(
|
||||
content=f"✓ Criterion met: {criterion}",
|
||||
color="green"
|
||||
)
|
||||
|
||||
print(f"[DEBUG] Validation complete: {len(unmet_criteria)} unmet criteria")
|
||||
return len(unmet_criteria) == 0, unmet_criteria
|
||||
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] Error validating criteria: {type(e).__name__}: {str(e)}")
|
||||
self._logger.log("warning", f"Error validating criteria: {str(e)}")
|
||||
# If we can't validate, assume all criteria are not met to be safe
|
||||
return False, self.agent_state.acceptance_criteria
|
||||
|
||||
def _create_criteria_retry_prompt(self, unmet_criteria: List[str]) -> str:
|
||||
"""Create a prompt to retry task with unmet criteria.
|
||||
|
||||
Args:
|
||||
unmet_criteria: List of criteria that weren't met
|
||||
|
||||
Returns:
|
||||
str: The retry prompt
|
||||
"""
|
||||
# Get task context
|
||||
task_description = self.task.description if self.task else "Not specified"
|
||||
expected_output = self.task.expected_output if self.task else "Not specified"
|
||||
|
||||
# Build information about what's in the scratchpad
|
||||
scratchpad_info = ""
|
||||
scratchpad_data_summary = ""
|
||||
if self.scratchpad_tool and self.agent_state.scratchpad:
|
||||
scratchpad_keys = list(self.agent_state.scratchpad.keys())
|
||||
scratchpad_info = f"""
|
||||
📦 YOUR SCRATCHPAD CONTAINS DATA:
|
||||
{chr(10).join(f" • '{key}'" for key in scratchpad_keys)}
|
||||
|
||||
TO ACCESS THIS DATA: Use the "Access Scratchpad Memory" tool with the key name.
|
||||
Example:
|
||||
Action: Access Scratchpad Memory
|
||||
Action Input: {{"key": "{scratchpad_keys[0] if scratchpad_keys else 'key_name'}"}}
|
||||
"""
|
||||
# Add summary of what's in scratchpad
|
||||
for key in scratchpad_keys[:3]: # Show first 3 keys as examples
|
||||
value = self.agent_state.scratchpad[key]
|
||||
if isinstance(value, list):
|
||||
scratchpad_data_summary += f"\n - '{key}': contains {len(value)} items"
|
||||
elif isinstance(value, dict):
|
||||
scratchpad_data_summary += f"\n - '{key}': contains data with {len(value)} fields"
|
||||
else:
|
||||
scratchpad_data_summary += f"\n - '{key}': contains stored data"
|
||||
|
||||
# Analyze what's missing based on criteria
|
||||
missing_data_hints = []
|
||||
for criterion in unmet_criteria:
|
||||
criterion_lower = criterion.lower()
|
||||
if "every email" in criterion_lower or "all" in criterion_lower:
|
||||
missing_data_hints.append("You need to retrieve ALL emails, not just a summary")
|
||||
if "date" in criterion_lower or "time" in criterion_lower:
|
||||
missing_data_hints.append("Include complete date/time information for each record")
|
||||
if "subject" in criterion_lower or "sender" in criterion_lower or "recipients" in criterion_lower:
|
||||
missing_data_hints.append("Ensure all email metadata (subject, sender, recipients) is included")
|
||||
if "format" in criterion_lower or "list" in criterion_lower:
|
||||
missing_data_hints.append("Format the data properly as requested")
|
||||
if "summary" in criterion_lower or "concise" in criterion_lower:
|
||||
missing_data_hints.append("Include a concise summary/snippet for each email")
|
||||
|
||||
# Get available tools (excluding scratchpad tool)
|
||||
available_tools = [tool for tool in self.tools_names.split(", ") if tool != "Access Scratchpad Memory"]
|
||||
tools_hint = f"\n🛠️ AVAILABLE TOOLS: {', '.join(available_tools)}" if available_tools else ""
|
||||
|
||||
# Get progress summary
|
||||
progress_summary = f"""
|
||||
📊 CURRENT PROGRESS:
|
||||
- Steps completed: {self.agent_state.steps_completed}
|
||||
- Tools used: {len(self.agent_state.tool_usage_history)} times"""
|
||||
|
||||
if self.agent_state.tool_usage_history:
|
||||
recent_tools = self.agent_state.tool_usage_history[-3:]
|
||||
progress_summary += f"\n- Recent tools: {', '.join(t.tool_name for t in recent_tools)}"
|
||||
|
||||
prompt = f"""❌ VALIDATION FAILED - YOU CANNOT PROVIDE A FINAL ANSWER YET!
|
||||
|
||||
Your output is INCOMPLETE and missing critical information.
|
||||
|
||||
🎯 ORIGINAL TASK:
|
||||
{task_description}
|
||||
|
||||
📋 EXPECTED OUTPUT:
|
||||
{expected_output}
|
||||
|
||||
❌ UNMET CRITERIA:
|
||||
{chr(10).join(f"❌ {criterion}" for criterion in unmet_criteria)}
|
||||
|
||||
⚠️ CRITICAL: You MUST go back to using tools to gather the missing data!
|
||||
|
||||
DO NOT attempt another "Final Answer" until you have ALL required data.
|
||||
{progress_summary}
|
||||
|
||||
🔧 REQUIRED ACTIONS:
|
||||
1. STOP trying to provide a Final Answer
|
||||
2. Switch to using Action/Action Input format
|
||||
3. Use tools to gather the missing information
|
||||
{scratchpad_info}
|
||||
|
||||
💡 WHAT YOU'RE MISSING:
|
||||
{chr(10).join(f"• {hint}" for hint in missing_data_hints) if missing_data_hints else "• Review the criteria and gather all required data"}
|
||||
{scratchpad_data_summary}
|
||||
|
||||
📋 YOUR NEXT STEP:
|
||||
You MUST use the following format to continue:
|
||||
|
||||
Thought: I need to gather the missing data using tools
|
||||
Action: [tool name]
|
||||
Action Input: {{"parameter": "value"}}
|
||||
{tools_hint}
|
||||
|
||||
⚠️ IMPORTANT REMINDERS:
|
||||
- The task requires you to retrieve EVERY email, not just summaries
|
||||
- You already have data in your scratchpad - ACCESS IT FIRST with "Access Scratchpad Memory"
|
||||
- Each email needs: date, time, subject, sender, recipients, and content snippet
|
||||
- Continue retrieving details for ALL emails until complete
|
||||
- Only provide a Final Answer after you have gathered ALL required data
|
||||
|
||||
CONTINUE WITH TOOL USAGE NOW - DO NOT ATTEMPT ANOTHER FINAL ANSWER."""
|
||||
|
||||
return prompt
|
||||
|
||||
def _is_tool_execution_successful(self, tool_result: ToolResult) -> bool:
|
||||
"""Check if a tool execution was successful based on the tool result."""
|
||||
if tool_result.result is None or tool_result.result == "":
|
||||
return False
|
||||
|
||||
# Check for common error indicators in the result
|
||||
result_str = str(tool_result.result).lower()
|
||||
error_indicators = [
|
||||
"error",
|
||||
"exception",
|
||||
"failed",
|
||||
"unable to",
|
||||
"couldn't",
|
||||
"not found",
|
||||
"invalid",
|
||||
"wrong tool name",
|
||||
"don't exist",
|
||||
"tool usage exception",
|
||||
"moving on then",
|
||||
"has reached its usage limit"
|
||||
]
|
||||
|
||||
# If any error indicator is found in the result, consider it a failure
|
||||
for indicator in error_indicators:
|
||||
if indicator in result_str:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user