Agent State Step 1

This commit is contained in:
João Moura
2025-05-31 23:15:39 -07:00
parent e3cd7209ad
commit 7009a6b7a0
5 changed files with 579 additions and 54 deletions

View File

@@ -71,6 +71,7 @@ class Agent(BaseAgent):
"""
_times_executed: int = PrivateAttr(default=0)
_last_reasoning_output: Optional[Any] = PrivateAttr(default=None)
max_execution_time: Optional[int] = Field(
default=None,
description="Maximum execution time for an agent to execute a task",
@@ -388,6 +389,9 @@ class Agent(BaseAgent):
reasoning_output: AgentReasoningOutput = reasoning_handler.handle_agent_reasoning()
# Store the reasoning output for the executor to use
self._last_reasoning_output = reasoning_output
plan_text = reasoning_output.plan.plan
internal_plan_msg = (
@@ -483,6 +487,10 @@ class Agent(BaseAgent):
self,
event=AgentExecutionCompletedEvent(agent=self, task=task, output=result),
)
# Clean up reasoning output after task completion
self._last_reasoning_output = None
return result
def _execute_with_timeout(self, task_prompt: str, task: Task, timeout: int) -> str:

View File

@@ -0,0 +1,200 @@
"""Agent state management for long-running tasks."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
from datetime import datetime
class ToolUsage(BaseModel):
"""Record of a single tool usage."""
tool_name: str = Field(description="Name of the tool used")
arguments: Dict[str, Any] = Field(description="Arguments passed to the tool (may be truncated)")
result_summary: Optional[str] = Field(default=None, description="Brief summary of the tool's result")
timestamp: datetime = Field(default_factory=datetime.now, description="When the tool was used")
step_number: int = Field(description="Which execution step this tool was used in")
class AgentState(BaseModel):
"""Persistent state object for agent task execution.
This state object helps agents maintain coherence during long-running tasks
by tracking plans, progress, and intermediate results without relying solely
on conversation history.
"""
# Core fields
completed: bool = Field(
default=False,
description="Whether the current task is finished"
)
original_plan: List[str] = Field(
default_factory=list,
description="The initial plan from first reasoning pass. Never overwrite unless user requests complete replan"
)
last_plan: List[str] = Field(
default_factory=list,
description="The most recent plan (original or mid-execution update)"
)
acceptance_criteria: List[str] = Field(
default_factory=list,
description="Concrete goals to satisfy before marking completed=true"
)
scratchpad: Dict[str, Any] = Field(
default_factory=dict,
description="Agent-defined storage for intermediate results and metadata"
)
tool_usage_history: List[ToolUsage] = Field(
default_factory=list,
description="Detailed history of tool usage including arguments and results"
)
# Additional tracking fields
task_id: Optional[str] = Field(
default=None,
description="ID of the current task being executed"
)
created_at: datetime = Field(
default_factory=datetime.now,
description="When this state was created"
)
last_updated: datetime = Field(
default_factory=datetime.now,
description="When this state was last modified"
)
steps_completed: int = Field(
default=0,
description="Number of execution steps completed"
)
def update_last_plan(self, new_plan: List[str]) -> None:
"""Update the last plan and timestamp."""
self.last_plan = new_plan
self.last_updated = datetime.now()
def set_original_plan(self, plan: List[str]) -> None:
"""Set the original plan (only if not already set)."""
if not self.original_plan:
self.original_plan = plan
self.last_plan = plan
self.last_updated = datetime.now()
def add_to_scratchpad(self, key: str, value: Any) -> None:
"""Add or update a value in the scratchpad."""
self.scratchpad[key] = value
self.last_updated = datetime.now()
def record_tool_usage(
self,
tool_name: str,
arguments: Dict[str, Any],
result_summary: Optional[str] = None,
max_arg_length: int = 200
) -> None:
"""Record a tool usage with truncated arguments.
Args:
tool_name: Name of the tool used
arguments: Arguments passed to the tool
result_summary: Optional brief summary of the result
max_arg_length: Maximum length for string arguments before truncation
"""
# Truncate long string arguments to prevent state bloat
truncated_args = {}
for key, value in arguments.items():
if isinstance(value, str) and len(value) > max_arg_length:
truncated_args[key] = value[:max_arg_length] + "..."
elif isinstance(value, (list, dict)):
# For complex types, store a summary
truncated_args[key] = f"<{type(value).__name__} with {len(value)} items>"
else:
truncated_args[key] = value
tool_usage = ToolUsage(
tool_name=tool_name,
arguments=truncated_args,
result_summary=result_summary,
step_number=self.steps_completed
)
self.tool_usage_history.append(tool_usage)
self.last_updated = datetime.now()
def increment_steps(self) -> None:
"""Increment the step counter."""
self.steps_completed += 1
self.last_updated = datetime.now()
def mark_completed(self) -> None:
"""Mark the task as completed."""
self.completed = True
self.last_updated = datetime.now()
def reset(self, task_id: Optional[str] = None) -> None:
"""Reset state for a new task."""
self.completed = False
self.original_plan = []
self.last_plan = []
self.acceptance_criteria = []
self.scratchpad = {}
self.tool_usage_history = []
self.task_id = task_id
self.created_at = datetime.now()
self.last_updated = datetime.now()
self.steps_completed = 0
def to_context_string(self) -> str:
"""Generate a concise string representation for LLM context."""
context = f"Current State (Step {self.steps_completed}):\n"
context += f"- Task ID: {self.task_id}\n"
context += f"- Completed: {self.completed}\n"
if self.acceptance_criteria:
context += "- Acceptance Criteria:\n"
for criterion in self.acceptance_criteria:
context += f"{criterion}\n"
if self.last_plan:
context += "- Current Plan:\n"
for i, step in enumerate(self.last_plan, 1):
context += f" {i}. {step}\n"
if self.tool_usage_history:
context += "- Recent Tool Usage:\n"
# Show last 5 tool uses
recent_tools = self.tool_usage_history[-5:]
for usage in recent_tools:
context += f" • Step {usage.step_number}: {usage.tool_name}"
if usage.arguments:
args_preview = ", ".join(f"{k}={v}" for k, v in list(usage.arguments.items())[:2])
context += f"({args_preview})"
context += "\n"
if self.scratchpad:
context += "- Scratchpad:\n"
for key, value in self.scratchpad.items():
context += f"{key}: {value}\n"
return context
def get_tools_summary(self) -> Dict[str, Any]:
"""Get a summary of tool usage statistics."""
if not self.tool_usage_history:
return {"total_tool_uses": 0, "unique_tools": 0, "tools_by_frequency": {}}
tool_counts = {}
for usage in self.tool_usage_history:
tool_counts[usage.tool_name] = tool_counts.get(usage.tool_name, 0) + 1
return {
"total_tool_uses": len(self.tool_usage_history),
"unique_tools": len(set(usage.tool_name for usage in self.tool_usage_history)),
"tools_by_frequency": dict(sorted(tool_counts.items(), key=lambda x: x[1], reverse=True))
}

View File

@@ -1,8 +1,8 @@
from collections import deque
from typing import Any, Callable, Dict, List, Optional, Union, cast
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.agents.agent_builder.base_agent_executor_mixin import CrewAgentExecutorMixin
from crewai.agents.agent_state import AgentState
from crewai.agents.parser import (
AgentAction,
AgentFinish,
@@ -84,8 +84,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.tool_name_to_tool_map: Dict[str, Union[CrewStructuredTool, BaseTool]] = {
tool.name: tool for tool in self.tools
}
self.tools_used: deque[str] = deque(maxlen=100) # Limit history size
self.steps_since_reasoning = 0
self.agent_state: AgentState = AgentState(task_id=str(task.id) if task else None)
existing_stop = self.llm.stop or []
self.llm.stop = list(
set(
@@ -96,6 +96,9 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
# Reset agent state for new task execution
self.agent_state.reset(task_id=str(self.task.id) if self.task else None)
if "system" in self.prompt:
system_prompt = self._format_prompt(self.prompt.get("system", ""), inputs)
user_prompt = self._format_prompt(self.prompt.get("user", ""), inputs)
@@ -110,6 +113,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))
try:
# Populate agent state from reasoning output if available
if hasattr(self.agent, "reasoning") and self.agent.reasoning:
self._populate_state_from_reasoning()
formatted_answer = self._invoke_loop()
except AssertionError:
self._printer.print(
@@ -128,11 +135,52 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if self.ask_for_human_input:
formatted_answer = self._handle_human_feedback(formatted_answer)
# Mark task as completed in agent state
self.agent_state.mark_completed()
self._create_short_term_memory(formatted_answer)
self._create_long_term_memory(formatted_answer)
self._create_external_memory(formatted_answer)
return {"output": formatted_answer.output}
def _populate_state_from_reasoning(self) -> None:
"""Populate agent state from the reasoning output if available."""
try:
# Check if the agent has reasoning output from the initial reasoning
if hasattr(self.agent, '_last_reasoning_output') and self.agent._last_reasoning_output:
reasoning_output = self.agent._last_reasoning_output
# Extract structured plan if available
if reasoning_output.plan.structured_plan:
self.agent_state.set_original_plan(reasoning_output.plan.structured_plan.steps)
self.agent_state.acceptance_criteria = reasoning_output.plan.structured_plan.acceptance_criteria
elif reasoning_output.plan.plan:
# Fallback: try to extract steps from unstructured plan
plan_lines = [line.strip() for line in reasoning_output.plan.plan.split('\n') if line.strip()]
# Take meaningful lines that look like steps (skip headers, empty lines, etc.)
steps = []
for line in plan_lines:
if line and not line.startswith('###') and not line.startswith('**'):
steps.append(line)
if len(steps) >= 10: # Limit to 10 steps
break
if steps:
self.agent_state.set_original_plan(steps)
# Add state context to messages for coherence
if self.agent_state.original_plan:
state_context = f"Initial plan loaded with {len(self.agent_state.original_plan)} steps."
self._append_message(state_context, role="assistant")
# Clear the reasoning output to avoid using it again
self.agent._last_reasoning_output = None
except Exception as e:
self._printer.print(
content=f"Error populating state from reasoning: {str(e)}",
color="yellow",
)
def _invoke_loop(self) -> AgentFinish:
"""
Main loop to invoke the agent's thought process until it reaches a conclusion
@@ -191,6 +239,37 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
formatted_answer, tool_result
)
# Record detailed tool usage in agent state
if hasattr(formatted_answer, 'tool') and formatted_answer.tool:
# Extract tool arguments from the agent action
tool_args = {}
if hasattr(formatted_answer, 'tool_input') and formatted_answer.tool_input:
if isinstance(formatted_answer.tool_input, dict):
tool_args = formatted_answer.tool_input
elif isinstance(formatted_answer.tool_input, str):
# Try to parse JSON if it's a string
try:
import json
tool_args = json.loads(formatted_answer.tool_input)
except (json.JSONDecodeError, TypeError):
tool_args = {"input": formatted_answer.tool_input}
# Truncate result for summary
result_summary = None
if tool_result and hasattr(tool_result, 'result'):
result_str = str(tool_result.result)
result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
# Record the tool usage with arguments
self.agent_state.record_tool_usage(
tool_name=formatted_answer.tool,
arguments=tool_args,
result_summary=result_summary
)
# Increment steps in agent state
self.agent_state.increment_steps()
if self._should_trigger_reasoning():
self._handle_mid_execution_reasoning()
else:
@@ -242,10 +321,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self, formatted_answer: AgentAction, tool_result: ToolResult
) -> Union[AgentAction, AgentFinish]:
"""Handle the AgentAction, execute tools, and process the results."""
if hasattr(formatted_answer, 'tool') and formatted_answer.tool:
if formatted_answer.tool not in self.tools_used:
self.tools_used.append(formatted_answer.tool)
# Special case for add_image_tool
add_image_tool = self._i18n.tools("add_image")
if (
@@ -485,12 +560,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
current_progress = self._summarize_current_progress()
# Build detailed tools used list from agent state
tools_used_detailed = []
for usage in self.agent_state.tool_usage_history:
tool_desc = f"{usage.tool_name}"
if usage.arguments:
args_preview = ", ".join(f"{k}={v}" for k, v in list(usage.arguments.items())[:2])
tool_desc += f"({args_preview})"
tools_used_detailed.append(tool_desc)
# Get tool usage statistics and patterns
tool_stats = self.agent_state.get_tools_summary()
# Detect patterns in tool usage
tool_patterns = self._detect_tool_patterns()
if tool_patterns:
tool_stats['recent_patterns'] = tool_patterns
reasoning_handler = AgentReasoning(task=self.task, agent=cast(Agent, self.agent))
return reasoning_handler.should_adaptive_reason_llm(
current_steps=self.iterations,
tools_used=list(self.tools_used),
tools_used=tools_used_detailed,
current_progress=current_progress,
tool_usage_stats=tool_stats
)
except Exception as e:
self._printer.print(
@@ -499,16 +592,47 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
return False
def _has_recent_errors(self) -> bool:
"""Check for error indicators in recent messages."""
error_indicators = ["error", "exception", "failed", "unable to", "couldn't"]
recent_messages = self.messages[-3:] if len(self.messages) >= 3 else self.messages
def _detect_tool_patterns(self) -> Optional[str]:
"""
Detect patterns in recent tool usage that might indicate issues.
for message in recent_messages:
content = message.get("content", "").lower()
if any(indicator in content for indicator in error_indicators):
return True
return False
Returns:
Optional[str]: Description of detected patterns, or None
"""
if not self.agent_state.tool_usage_history:
return None
patterns = []
# Check for repeated use of the same tool with similar arguments
recent_tools = self.agent_state.tool_usage_history[-5:] if len(self.agent_state.tool_usage_history) >= 5 else self.agent_state.tool_usage_history
# Count consecutive uses of the same tool
if len(recent_tools) >= 2:
consecutive_count = 1
for i in range(1, len(recent_tools)):
if recent_tools[i].tool_name == recent_tools[i-1].tool_name:
consecutive_count += 1
if consecutive_count >= 3:
patterns.append(f"Same tool ({recent_tools[i].tool_name}) used {consecutive_count} times consecutively")
else:
consecutive_count = 1
# Check for tools with empty or error results
error_count = 0
for usage in recent_tools:
if usage.result_summary and any(keyword in usage.result_summary.lower()
for keyword in ['error', 'failed', 'not found', 'empty']):
error_count += 1
if error_count >= 2:
patterns.append(f"{error_count} tools returned errors or empty results recently")
# Check for rapid tool switching (might indicate confusion)
if len(set(usage.tool_name for usage in recent_tools)) == len(recent_tools) and len(recent_tools) >= 4:
patterns.append("Rapid switching between different tools without repetition")
return "; ".join(patterns) if patterns else None
def _handle_mid_execution_reasoning(self) -> None:
"""
@@ -522,21 +646,51 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
current_progress = self._summarize_current_progress()
# Include agent state in progress summary
state_info = f"\n\n{self.agent_state.to_context_string()}"
current_progress += state_info
from crewai.agent import Agent
reasoning_handler = AgentReasoning(task=self.task, agent=cast(Agent, self.agent))
# Build detailed tools used list from agent state
tools_used_detailed = []
for usage in self.agent_state.tool_usage_history:
tool_desc = f"{usage.tool_name}"
if usage.arguments:
args_preview = ", ".join(f"{k}={v}" for k, v in list(usage.arguments.items())[:2])
tool_desc += f"({args_preview})"
tools_used_detailed.append(tool_desc)
reasoning_output = reasoning_handler.handle_mid_execution_reasoning(
current_steps=self.iterations,
tools_used=list(self.tools_used),
tools_used=tools_used_detailed,
current_progress=current_progress,
iteration_messages=self.messages
)
# Update agent state with new plan if available
if reasoning_output.plan.structured_plan:
self.agent_state.update_last_plan(reasoning_output.plan.structured_plan.steps)
# Update acceptance criteria if they changed
if reasoning_output.plan.structured_plan.acceptance_criteria:
self.agent_state.acceptance_criteria = reasoning_output.plan.structured_plan.acceptance_criteria
# Add a note about the reasoning update to scratchpad
self.agent_state.add_to_scratchpad(
f"reasoning_update_{self.iterations}",
{
"reason": "Mid-execution reasoning triggered",
"updated_plan": bool(reasoning_output.plan.structured_plan)
}
)
updated_plan_msg = (
self._i18n.retrieve("reasoning", "mid_execution_reasoning_update").format(
plan=reasoning_output.plan.plan
) +
f"\n\nUpdated State:\n{self.agent_state.to_context_string()}" +
"\n\nRemember: strictly follow the updated plan above and ensure the final answer fully meets the EXPECTED OUTPUT criteria."
)
@@ -561,9 +715,25 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
summary = f"After {self.iterations} steps, "
if self.tools_used:
unique_tools = set(self.tools_used)
summary += f"I've used {len(self.tools_used)} tools ({', '.join(unique_tools)}). "
# Use tool usage history from agent state for better context
if self.agent_state.tool_usage_history:
tool_summary = self.agent_state.get_tools_summary()
summary += f"I've used {tool_summary['total_tool_uses']} tools ({tool_summary['unique_tools']} unique). "
# Include most frequently used tools
if tool_summary['tools_by_frequency']:
top_tools = list(tool_summary['tools_by_frequency'].items())[:3]
tools_str = ", ".join(f"{tool} ({count}x)" for tool, count in top_tools)
summary += f"Most used: {tools_str}. "
# Include details of the last tool use
if self.agent_state.tool_usage_history:
last_tool = self.agent_state.tool_usage_history[-1]
summary += f"Last tool: {last_tool.tool_name}"
if last_tool.arguments:
args_str = ", ".join(f"{k}={v}" for k, v in list(last_tool.arguments.items())[:2])
summary += f" with args ({args_str})"
summary += ". "
else:
summary += "I haven't used any tools yet. "
@@ -574,3 +744,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
summary += f"Most recent action: {last_message}"
return summary
def _has_recent_errors(self) -> bool:
"""Check for error indicators in recent messages."""
error_indicators = ["error", "exception", "failed", "unable to", "couldn't"]
recent_messages = self.messages[-3:] if len(self.messages) >= 3 else self.messages
for message in recent_messages:
content = message.get("content", "").lower()
if any(indicator in content for indicator in error_indicators):
return True
return False

View File

@@ -55,12 +55,12 @@
"reasoning": {
"initial_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are creating a strategic plan for a task that requires your expertise and unique perspective.",
"refine_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are refining a strategic plan for a task that requires your expertise and unique perspective.",
"create_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou have been assigned the following task:\n{description}\n\nExpected output:\n{expected_output}\n\nAvailable tools: {tools}\n\nBefore executing this task, create a detailed plan that leverages your expertise as {role} and outlines:\n1. Your understanding of the task from your professional perspective\n2. The key steps you'll take to complete it, drawing on your background and skills\n3. How you'll approach any challenges that might arise, considering your expertise\n4. How you'll strategically use the available tools based on your experience, exactly what tools to use and how to use them\n5. The expected outcome and how it aligns with your goal\n\nRemember: your ultimate objective is to produce the most COMPLETE Final Answer that fully meets the **Expected output** criteria.\n\nAfter creating your plan, assess whether you feel ready to execute the task or if you could do better.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan because [specific reason].\"",
"refine_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou created the following plan for this task:\n{current_plan}\n\nHowever, you indicated that you're not ready to execute the task yet.\n\nPlease refine your plan further, drawing on your expertise as {role} to address any gaps or uncertainties. As you refine your plan, be specific about which available tools you will use, how you will use them, and why they are the best choices for each step. Clearly outline your tool usage strategy as part of your improved plan.\n\nMake sure your refined strategy directly guides you toward producing the most COMPLETE Final Answer that fully satisfies the **Expected output**.\n\nAfter refining your plan, assess whether you feel ready to execute the task.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan further because [specific reason].\"",
"create_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou have been assigned the following task:\n{description}\n\nExpected output:\n{expected_output}\n\nAvailable tools: {tools}\n\nBefore executing this task, create a detailed plan that leverages your expertise as {role} and outlines:\n1. Your understanding of the task from your professional perspective\n2. The key steps you'll take to complete it, drawing on your background and skills\n3. How you'll approach any challenges that might arise, considering your expertise\n4. How you'll strategically use the available tools based on your experience, exactly what tools to use and how to use them\n5. The expected outcome and how it aligns with your goal\n\nIMPORTANT: Structure your plan as follows:\n\nSTEPS:\n1. [First concrete action step]\n2. [Second concrete action step]\n3. [Continue with numbered steps...]\n\nACCEPTANCE CRITERIA:\n- [First criterion that must be met]\n- [Second criterion that must be met]\n- [Continue with criteria...]\n\nRemember: your ultimate objective is to produce the most COMPLETE Final Answer that fully meets the **Expected output** criteria.\n\nAfter creating your plan, assess whether you feel ready to execute the task or if you could do better.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan because [specific reason].\"",
"refine_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou created the following plan for this task:\n{current_plan}\n\nHowever, you indicated that you're not ready to execute the task yet.\n\nPlease refine your plan further, drawing on your expertise as {role} to address any gaps or uncertainties. As you refine your plan, be specific about which available tools you will use, how you will use them, and why they are the best choices for each step. Clearly outline your tool usage strategy as part of your improved plan.\n\nIMPORTANT: Structure your refined plan as follows:\n\nSTEPS:\n1. [First concrete action step]\n2. [Second concrete action step]\n3. [Continue with numbered steps...]\n\nACCEPTANCE CRITERIA:\n- [First criterion that must be met]\n- [Second criterion that must be met]\n- [Continue with criteria...]\n\nMake sure your refined strategy directly guides you toward producing the most COMPLETE Final Answer that fully satisfies the **Expected output**.\n\nAfter refining your plan, assess whether you feel ready to execute the task.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan further because [specific reason].\"",
"adaptive_reasoning_decision": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are currently executing a task and need to decide whether to pause and reassess your plan based on the current context.",
"mid_execution_reasoning": "You are currently executing a task and need to reassess your plan based on progress so far.\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT PROGRESS:\nSteps completed: {current_steps}\nTools used: {tools_used}\nProgress summary: {current_progress}\n\nRECENT CONVERSATION:\n{recent_messages}\n\nYour reassessment MUST focus on steering the remaining work toward a FINAL ANSWER that is as complete as possible and perfectly matches the **Expected output**.\n\nBased on the current progress and context, please reassess your plan for completing this task.\nConsider what has been accomplished, what challenges you've encountered, and what steps remain.\nAdjust your strategy if needed or confirm your current approach is still optimal.\n\nProvide a detailed updated plan for completing the task.\nEnd with \"READY: I am ready to continue executing the task.\" if you're confident in your plan.",
"mid_execution_reasoning": "You are currently executing a task and need to reassess your plan based on progress so far.\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT PROGRESS:\nSteps completed: {current_steps}\nTools used: {tools_used}\nProgress summary: {current_progress}\n\nRECENT CONVERSATION:\n{recent_messages}\n\nYour reassessment MUST focus on steering the remaining work toward a FINAL ANSWER that is as complete as possible and perfectly matches the **Expected output**.\n\nBased on the current progress and context, please reassess your plan for completing this task.\nConsider what has been accomplished, what challenges you've encountered, and what steps remain.\nAdjust your strategy if needed or confirm your current approach is still optimal.\n\nIMPORTANT: Structure your updated plan as follows:\n\nREMAINING STEPS:\n1. [First remaining action step]\n2. [Second remaining action step]\n3. [Continue with numbered steps...]\n\nUPDATED ACCEPTANCE CRITERIA (if changed):\n- [First criterion that must be met]\n- [Second criterion that must be met]\n- [Continue with criteria...]\n\nProvide a detailed updated plan for completing the task.\nEnd with \"READY: I am ready to continue executing the task.\" if you're confident in your plan.",
"mid_execution_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are reassessing your plan during task execution based on the progress made so far.",
"mid_execution_reasoning_update": "I've reassessed my approach based on progress so far. Updated plan:\n\n{plan}",
"adaptive_reasoning_context": "\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT EXECUTION CONTEXT:\n- Steps completed: {current_steps}\n- Tools used: {tools_used}\n- Progress summary: {current_progress}\n\nConsider whether the current approach is optimal or if a strategic pause to reassess would be beneficial. You should reason when:\n- You might be approaching the task inefficiently\n- The context suggests a different strategy might be better\n- You're uncertain about the next steps\n- The progress suggests you need to reconsider your approach\n\nDecide whether reasoning/re-planning is needed at this point."
"adaptive_reasoning_context": "\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT EXECUTION CONTEXT:\n- Steps completed: {current_steps}\n- Tools used: {tools_used}\n- Progress summary: {current_progress}\n\nConsider whether the current approach is optimal or if a strategic pause to reassess would be beneficial. You should reason when:\n- You might be approaching the task inefficiently\n- The context suggests a different strategy might be better\n- You're uncertain about the next steps\n- The progress suggests you need to reconsider your approach\n- Tool usage patterns indicate issues (e.g., repeated failures, same tool used many times, rapid switching)\n- Multiple tools have returned errors or empty results\n- You're using the same tool repeatedly without making progress\n\nPay special attention to the TOOL USAGE STATISTICS section if present, as it reveals patterns that might not be obvious from the tool list alone.\n\nDecide whether reasoning/re-planning is needed at this point."
}
}

View File

@@ -1,6 +1,6 @@
import logging
import json
from typing import Tuple, cast
from typing import Tuple, cast, List, Optional, Dict, Any
from pydantic import BaseModel, Field
@@ -16,10 +16,17 @@ from crewai.utilities.events.reasoning_events import (
)
class StructuredPlan(BaseModel):
"""Structured representation of a task plan."""
steps: List[str] = Field(description="List of steps to complete the task")
acceptance_criteria: List[str] = Field(description="Criteria that must be met before task is considered complete")
class ReasoningPlan(BaseModel):
"""Model representing a reasoning plan for a task."""
plan: str = Field(description="The detailed reasoning plan for the task.")
ready: bool = Field(description="Whether the agent is ready to execute the task.")
structured_plan: Optional[StructuredPlan] = Field(default=None, description="Structured version of the plan")
class AgentReasoningOutput(BaseModel):
@@ -31,6 +38,8 @@ class ReasoningFunction(BaseModel):
"""Model for function calling with reasoning."""
plan: str = Field(description="The detailed reasoning plan for the task.")
ready: bool = Field(description="Whether the agent is ready to execute the task.")
steps: Optional[List[str]] = Field(default=None, description="List of steps to complete the task")
acceptance_criteria: Optional[List[str]] = Field(default=None, description="Criteria that must be met before task is complete")
class AgentReasoning:
@@ -119,25 +128,25 @@ class AgentReasoning:
Returns:
AgentReasoningOutput: The output of the agent reasoning process.
"""
plan, ready = self.__create_initial_plan()
plan, ready, structured_plan = self.__create_initial_plan()
plan, ready = self.__refine_plan_if_needed(plan, ready)
plan, ready, structured_plan = self.__refine_plan_if_needed(plan, ready, structured_plan)
reasoning_plan = ReasoningPlan(plan=plan, ready=ready)
reasoning_plan = ReasoningPlan(plan=plan, ready=ready, structured_plan=structured_plan)
return AgentReasoningOutput(plan=reasoning_plan)
def __create_initial_plan(self) -> Tuple[str, bool]:
def __create_initial_plan(self) -> Tuple[str, bool, Optional[StructuredPlan]]:
"""
Creates the initial reasoning plan for the task.
Returns:
Tuple[str, bool]: The initial plan and whether the agent is ready to execute the task.
Tuple[str, bool, Optional[StructuredPlan]]: The initial plan, whether the agent is ready, and structured plan.
"""
reasoning_prompt = self.__create_reasoning_prompt()
if self.llm.supports_function_calling():
plan, ready = self.__call_with_function(reasoning_prompt, "initial_plan")
return plan, ready
plan, ready, structured_plan = self.__call_with_function(reasoning_prompt, "initial_plan")
return plan, ready, structured_plan
else:
system_prompt = self.i18n.retrieve("reasoning", "initial_plan").format(
role=self.agent.role,
@@ -152,18 +161,21 @@ class AgentReasoning:
]
)
return self.__parse_reasoning_response(str(response))
plan, ready = self.__parse_reasoning_response(str(response))
structured_plan = self.__extract_structured_plan(plan)
return plan, ready, structured_plan
def __refine_plan_if_needed(self, plan: str, ready: bool) -> Tuple[str, bool]:
def __refine_plan_if_needed(self, plan: str, ready: bool, structured_plan: Optional[StructuredPlan]) -> Tuple[str, bool, Optional[StructuredPlan]]:
"""
Refines the reasoning plan if the agent is not ready to execute the task.
Args:
plan: The current reasoning plan.
ready: Whether the agent is ready to execute the task.
structured_plan: The current structured plan.
Returns:
Tuple[str, bool]: The refined plan and whether the agent is ready to execute the task.
Tuple[str, bool, Optional[StructuredPlan]]: The refined plan, ready status, and structured plan.
"""
attempt = 1
max_attempts = self.agent.max_reasoning_attempts
@@ -185,7 +197,7 @@ class AgentReasoning:
refine_prompt = self.__create_refine_prompt(plan)
if self.llm.supports_function_calling():
plan, ready = self.__call_with_function(refine_prompt, "refine_plan")
plan, ready, structured_plan = self.__call_with_function(refine_prompt, "refine_plan")
else:
system_prompt = self.i18n.retrieve("reasoning", "refine_plan").format(
role=self.agent.role,
@@ -200,6 +212,7 @@ class AgentReasoning:
]
)
plan, ready = self.__parse_reasoning_response(str(response))
structured_plan = self.__extract_structured_plan(plan)
attempt += 1
@@ -209,9 +222,9 @@ class AgentReasoning:
)
break
return plan, ready
return plan, ready, structured_plan
def __call_with_function(self, prompt: str, prompt_type: str) -> Tuple[str, bool]:
def __call_with_function(self, prompt: str, prompt_type: str) -> Tuple[str, bool, Optional[StructuredPlan]]:
"""
Calls the LLM with function calling to get a reasoning plan.
@@ -220,7 +233,7 @@ class AgentReasoning:
prompt_type: The type of prompt (initial_plan or refine_plan).
Returns:
Tuple[str, bool]: A tuple containing the plan and whether the agent is ready.
Tuple[str, bool, Optional[StructuredPlan]]: A tuple containing the plan, ready status, and structured plan.
"""
self.logger.debug(f"Using function calling for {prompt_type} reasoning")
@@ -239,6 +252,16 @@ class AgentReasoning:
"ready": {
"type": "boolean",
"description": "Whether the agent is ready to execute the task."
},
"steps": {
"type": "array",
"items": {"type": "string"},
"description": "List of steps to complete the task"
},
"acceptance_criteria": {
"type": "array",
"items": {"type": "string"},
"description": "Criteria that must be met before task is considered complete"
}
},
"required": ["plan", "ready"]
@@ -254,9 +277,14 @@ class AgentReasoning:
)
# Prepare a simple callable that just returns the tool arguments as JSON
def _create_reasoning_plan(plan: str, ready: bool): # noqa: N802
def _create_reasoning_plan(plan: str, ready: bool, steps: Optional[List[str]] = None, acceptance_criteria: Optional[List[str]] = None): # noqa: N802
"""Return the reasoning plan result in JSON string form."""
return json.dumps({"plan": plan, "ready": ready})
return json.dumps({
"plan": plan,
"ready": ready,
"steps": steps,
"acceptance_criteria": acceptance_criteria
})
response = self.llm.call(
[
@@ -272,12 +300,19 @@ class AgentReasoning:
try:
result = json.loads(response)
if "plan" in result and "ready" in result:
return result["plan"], result["ready"]
structured_plan = None
if result.get("steps") or result.get("acceptance_criteria"):
structured_plan = StructuredPlan(
steps=result.get("steps", []),
acceptance_criteria=result.get("acceptance_criteria", [])
)
return result["plan"], result["ready"], structured_plan
except (json.JSONDecodeError, KeyError):
pass
response_str = str(response)
return response_str, "READY: I am ready to execute the task." in response_str
structured_plan = self.__extract_structured_plan(response_str)
return response_str, "READY: I am ready to execute the task." in response_str, structured_plan
except Exception as e:
self.logger.warning(f"Error during function calling: {str(e)}. Falling back to text parsing.")
@@ -297,10 +332,11 @@ class AgentReasoning:
)
fallback_str = str(fallback_response)
return fallback_str, "READY: I am ready to execute the task." in fallback_str
structured_plan = self.__extract_structured_plan(fallback_str)
return fallback_str, "READY: I am ready to execute the task." in fallback_str, structured_plan
except Exception as inner_e:
self.logger.error(f"Error during fallback text parsing: {str(inner_e)}")
return "Failed to generate a plan due to an error.", True # Default to ready to avoid getting stuck
return "Failed to generate a plan due to an error.", True, None # Default to ready to avoid getting stuck
def __get_agent_backstory(self) -> str:
"""
@@ -496,7 +532,7 @@ class AgentReasoning:
)
if self.llm.supports_function_calling():
plan, ready = self.__call_with_function(mid_execution_prompt, "mid_execution_plan")
plan, ready, structured_plan = self.__call_with_function(mid_execution_prompt, "mid_execution_plan")
else:
# Use the same prompt for system context
system_prompt = self.i18n.retrieve("reasoning", "mid_execution_plan").format(
@@ -513,8 +549,9 @@ class AgentReasoning:
)
plan, ready = self.__parse_reasoning_response(str(response))
structured_plan = self.__extract_structured_plan(plan)
reasoning_plan = ReasoningPlan(plan=plan, ready=ready)
reasoning_plan = ReasoningPlan(plan=plan, ready=ready, structured_plan=structured_plan)
return AgentReasoningOutput(plan=reasoning_plan)
def __create_mid_execution_prompt(
@@ -560,7 +597,8 @@ class AgentReasoning:
self,
current_steps: int,
tools_used: list,
current_progress: str
current_progress: str,
tool_usage_stats: Optional[Dict[str, Any]] = None
) -> bool:
"""
Use LLM function calling to determine if adaptive reasoning should be triggered.
@@ -569,13 +607,14 @@ class AgentReasoning:
current_steps: Number of steps executed so far
tools_used: List of tools that have been used
current_progress: Summary of progress made so far
tool_usage_stats: Optional statistics about tool usage patterns
Returns:
bool: True if reasoning should be triggered, False otherwise.
"""
try:
decision_prompt = self.__create_adaptive_reasoning_decision_prompt(
current_steps, tools_used, current_progress
current_steps, tools_used, current_progress, tool_usage_stats
)
if self.llm.supports_function_calling():
@@ -618,6 +657,11 @@ class AgentReasoning:
"reasoning": {
"type": "string",
"description": "Brief explanation of why reasoning is or isn't needed."
},
"detected_issues": {
"type": "array",
"items": {"type": "string"},
"description": "List of specific issues detected (e.g., 'repeated tool failures', 'no progress', 'inefficient approach')"
}
},
"required": ["should_reason", "reasoning"]
@@ -625,9 +669,18 @@ class AgentReasoning:
}
}
def _decide_reasoning_need(should_reason: bool, reasoning: str):
def _decide_reasoning_need(should_reason: bool, reasoning: str, detected_issues: Optional[List[str]] = None):
"""Return the reasoning decision result in JSON string form."""
return json.dumps({"should_reason": should_reason, "reasoning": reasoning})
result = {
"should_reason": should_reason,
"reasoning": reasoning
}
if detected_issues:
result["detected_issues"] = detected_issues
# Append detected issues to reasoning explanation
issues_str = ", ".join(detected_issues)
result["reasoning"] = f"{reasoning} Detected issues: {issues_str}"
return json.dumps(result)
system_prompt = self.i18n.retrieve("reasoning", "adaptive_reasoning_decision").format(
role=self.agent.role,
@@ -646,7 +699,11 @@ class AgentReasoning:
try:
result = json.loads(response)
return result.get("should_reason", False), result.get("reasoning", "No explanation provided")
reasoning_text = result.get("reasoning", "No explanation provided")
if result.get("detected_issues"):
# Include detected issues in the reasoning text for logging
self.logger.debug(f"Adaptive reasoning detected issues: {result['detected_issues']}")
return result.get("should_reason", False), reasoning_text
except (json.JSONDecodeError, KeyError):
return False, "No explanation provided"
@@ -669,11 +726,27 @@ class AgentReasoning:
self,
current_steps: int,
tools_used: list,
current_progress: str
current_progress: str,
tool_usage_stats: Optional[Dict[str, Any]] = None
) -> str:
"""Create prompt for adaptive reasoning decision."""
tools_used_str = ", ".join(tools_used) if tools_used else "No tools used yet"
# Add tool usage statistics to the prompt
tool_stats_str = ""
if tool_usage_stats:
tool_stats_str = f"\n\nTOOL USAGE STATISTICS:\n"
tool_stats_str += f"- Total tool invocations: {tool_usage_stats.get('total_tool_uses', 0)}\n"
tool_stats_str += f"- Unique tools used: {tool_usage_stats.get('unique_tools', 0)}\n"
if tool_usage_stats.get('tools_by_frequency'):
tool_stats_str += "- Tool frequency:\n"
for tool, count in tool_usage_stats['tools_by_frequency'].items():
tool_stats_str += f"{tool}: {count} times\n"
if tool_usage_stats.get('recent_patterns'):
tool_stats_str += f"- Recent patterns: {tool_usage_stats['recent_patterns']}\n"
# Use the prompt from i18n and format it with the current context
base_prompt = self.i18n.retrieve("reasoning", "adaptive_reasoning_decision").format(
role=self.agent.role,
@@ -686,9 +759,72 @@ class AgentReasoning:
expected_output=self.task.expected_output,
current_steps=current_steps,
tools_used=tools_used_str,
current_progress=current_progress
current_progress=current_progress + tool_stats_str
)
prompt = base_prompt + context_prompt
return prompt
def __extract_structured_plan(self, plan: str) -> Optional[StructuredPlan]:
"""
Extracts a structured plan from the given plan text.
Args:
plan: The plan text.
Returns:
Optional[StructuredPlan]: The extracted structured plan or None if no plan was found.
"""
if not plan:
return None
import re
steps = []
acceptance_criteria = []
# Look for numbered steps (1., 2., etc.)
step_pattern = r'^\s*(?:\d+\.|\-|\*)\s*(.+)$'
# Look for acceptance criteria section
in_acceptance_section = False
lines = plan.split('\n')
for line in lines:
line = line.strip()
# Check if we're entering acceptance criteria section
if any(marker in line.lower() for marker in ['acceptance criteria', 'success criteria', 'completion criteria']):
in_acceptance_section = True
continue
# Skip empty lines
if not line:
continue
# Extract steps or criteria
match = re.match(step_pattern, line, re.MULTILINE)
if match:
content = match.group(1).strip()
if in_acceptance_section:
acceptance_criteria.append(content)
else:
steps.append(content)
elif line and not line.endswith(':'): # Non-empty line that's not a header
if in_acceptance_section:
acceptance_criteria.append(line)
else:
# Check if it looks like a step (starts with action verb)
action_verbs = ['create', 'implement', 'design', 'build', 'test', 'verify', 'check', 'ensure', 'analyze', 'review']
if any(line.lower().startswith(verb) for verb in action_verbs):
steps.append(line)
# If we found steps or criteria, return structured plan
if steps or acceptance_criteria:
return StructuredPlan(
steps=steps,
acceptance_criteria=acceptance_criteria
)
return None