chore: refactor parser & constants, improve tools_handler, update tests

- Move parser constants to dedicated module with pre-compiled regex - Refactor CrewAgentParser to module functions; remove unused params - Improve tools_handler with instance attributes - Update tests to use module-level parser functions
2026-05-02 07:42:40 +00:00 · 2025-08-29 14:35:08 -04:00
parent ec1eff02a8
commit e4c4b81e63
6 changed files with 249 additions and 188 deletions
--- a/src/crewai/agents/parser.py
+++ b/src/crewai/agents/parser.py
@@ -1,50 +1,67 @@
-import re
-from typing import Any, Optional, Union
+"""Agent output parsing module for ReAct-style LLM responses.
+
+This module provides parsing functionality for agent outputs that follow
+the ReAct (Reasoning and Acting) format, converting them into structured
+AgentAction or AgentFinish objects.
+"""
+
+from dataclasses import dataclass

 from json_repair import repair_json

+from crewai.agents.constants import (
+    ACTION_INPUT_REGEX,
+    ACTION_REGEX,
+    ACTION_INPUT_ONLY_REGEX,
+    FINAL_ANSWER_ACTION,
+    MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE,
+    MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
+    UNABLE_TO_REPAIR_JSON_RESULTS,
+)
 from crewai.utilities import I18N

-FINAL_ANSWER_ACTION = "Final Answer:"
-MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = "I did it wrong. Invalid Format: I missed the 'Action:' after 'Thought:'. I will do right next, and don't use a tool I have already used.\n"
-MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE = "I did it wrong. Invalid Format: I missed the 'Action Input:' after 'Action:'. I will do right next, and don't use a tool I have already used.\n"
-FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE = "I did it wrong. Tried to both perform Action and give a Final Answer at the same time, I must do one or the other"
+_I18N = I18N()


+@dataclass
 class AgentAction:
+    """Represents an action to be taken by an agent."""
+
    thought: str
    tool: str
    tool_input: str
    text: str
-    result: str
-
-    def __init__(self, thought: str, tool: str, tool_input: str, text: str):
-        self.thought = thought
-        self.tool = tool
-        self.tool_input = tool_input
-        self.text = text
+    result: str | None = None


+@dataclass
 class AgentFinish:
+    """Represents the final answer from an agent."""
+
    thought: str
    output: str
    text: str

-    def __init__(self, thought: str, output: str, text: str):
-        self.thought = thought
-        self.output = output
-        self.text = text
-

 class OutputParserException(Exception):
-    error: str
+    """Exception raised when output parsing fails.

-    def __init__(self, error: str):
+    Attributes:
+        error: The error message.
+    """
+
+    def __init__(self, error: str) -> None:
+        """Initialize OutputParserException.
+
+        Args:
+            error: The error message.
+        """
        self.error = error
+        super().__init__(error)


-class CrewAgentParser:
-    """Parses ReAct-style LLM calls that have a single tool input.
+def parse(text: str) -> AgentAction | AgentFinish:
+    """Parse agent output text into AgentAction or AgentFinish.

    Expects output to be in one of two formats.

@@ -62,108 +79,117 @@ class CrewAgentParser:

    Thought: agent thought here
    Final Answer: The temperature is 100 degrees
+
+    Args:
+        text: The agent output text to parse.
+
+    Returns:
+        AgentAction or AgentFinish based on the content.
+
+    Raises:
+        OutputParserException: If the text format is invalid.
    """
+    thought = _extract_thought(text)
+    includes_answer = FINAL_ANSWER_ACTION in text
+    action_match = ACTION_INPUT_REGEX.search(text)

-    _i18n: I18N = I18N()
-    agent: Any = None
+    if includes_answer:
+        final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip()
+        # Check whether the final answer ends with triple backticks.
+        if final_answer.endswith("```"):
+            # Count occurrences of triple backticks in the final answer.
+            count = final_answer.count("```")
+            # If count is odd then it's an unmatched trailing set; remove it.
+            if count % 2 != 0:
+                final_answer = final_answer[:-3].rstrip()
+        return AgentFinish(thought=thought, output=final_answer, text=text)

-    def __init__(self, agent: Optional[Any] = None):
-        self.agent = agent
+    elif action_match:
+        action = action_match.group(1)
+        clean_action = _clean_action(action)

-    @staticmethod
-    def parse_text(text: str) -> Union[AgentAction, AgentFinish]:
-        """
-        Static method to parse text into an AgentAction or AgentFinish without needing to instantiate the class.
+        action_input = action_match.group(2).strip()

-        Args:
-            text: The text to parse.
+        tool_input = action_input.strip(" ").strip('"')
+        safe_tool_input = _safe_repair_json(tool_input)

-        Returns:
-            Either an AgentAction or AgentFinish based on the parsed content.
-        """
-        parser = CrewAgentParser()
-        return parser.parse(text)
-
-    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
-        thought = self._extract_thought(text)
-        includes_answer = FINAL_ANSWER_ACTION in text
-        regex = (
-            r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
+        return AgentAction(
+            thought=thought, tool=clean_action, tool_input=safe_tool_input, text=text
        )
-        action_match = re.search(regex, text, re.DOTALL)
-        if includes_answer:
-            final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip()
-            # Check whether the final answer ends with triple backticks.
-            if final_answer.endswith("```"):
-                # Count occurrences of triple backticks in the final answer.
-                count = final_answer.count("```")
-                # If count is odd then it's an unmatched trailing set; remove it.
-                if count % 2 != 0:
-                    final_answer = final_answer[:-3].rstrip()
-            return AgentFinish(thought, final_answer, text)

-        elif action_match:
-            action = action_match.group(1)
-            clean_action = self._clean_action(action)
+    if not ACTION_REGEX.search(text):
+        raise OutputParserException(
+            f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{_I18N.slice('final_answer_format')}",
+        )
+    elif not ACTION_INPUT_ONLY_REGEX.search(text):
+        raise OutputParserException(
+            MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
+        )
+    else:
+        err_format = _I18N.slice("format_without_tools")
+        error = f"{err_format}"
+        raise OutputParserException(
+            error,
+        )

-            action_input = action_match.group(2).strip()

-            tool_input = action_input.strip(" ").strip('"')
-            safe_tool_input = self._safe_repair_json(tool_input)
+def _extract_thought(text: str) -> str:
+    """Extract the thought portion from the text.

-            return AgentAction(thought, clean_action, safe_tool_input, text)
+    Args:
+        text: The full agent output text.

-        if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
-            raise OutputParserException(
-                f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{self._i18n.slice('final_answer_format')}",
-            )
-        elif not re.search(
-            r"[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)", text, re.DOTALL
-        ):
-            raise OutputParserException(
-                MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
-            )
-        else:
-            format = self._i18n.slice("format_without_tools")
-            error = f"{format}"
-            raise OutputParserException(
-                error,
-            )
+    Returns:
+        The extracted thought string.
+    """
+    thought_index = text.find("\nAction")
+    if thought_index == -1:
+        thought_index = text.find("\nFinal Answer")
+    if thought_index == -1:
+        return ""
+    thought = text[:thought_index].strip()
+    # Remove any triple backticks from the thought string
+    thought = thought.replace("```", "").strip()
+    return thought

-    def _extract_thought(self, text: str) -> str:
-        thought_index = text.find("\nAction")
-        if thought_index == -1:
-            thought_index = text.find("\nFinal Answer")
-        if thought_index == -1:
-            return ""
-        thought = text[:thought_index].strip()
-        # Remove any triple backticks from the thought string
-        thought = thought.replace("```", "").strip()
-        return thought

-    def _clean_action(self, text: str) -> str:
-        """Clean action string by removing non-essential formatting characters."""
-        return text.strip().strip("*").strip()
+def _clean_action(text: str) -> str:
+    """Clean action string by removing non-essential formatting characters.

-    def _safe_repair_json(self, tool_input: str) -> str:
-        UNABLE_TO_REPAIR_JSON_RESULTS = ['""', "{}"]
+    Args:
+        text: The action text to clean.

-        # Skip repair if the input starts and ends with square brackets
-        # Explanation: The JSON parser has issues handling inputs that are enclosed in square brackets ('[]').
-        # These are typically valid JSON arrays or strings that do not require repair. Attempting to repair such inputs
-        # might lead to unintended alterations, such as wrapping the entire input in additional layers or modifying
-        # the structure in a way that changes its meaning. By skipping the repair for inputs that start and end with
-        # square brackets, we preserve the integrity of these valid JSON structures and avoid unnecessary modifications.
-        if tool_input.startswith("[") and tool_input.endswith("]"):
-            return tool_input
+    Returns:
+        The cleaned action string.
+    """
+    return text.strip().strip("*").strip()

-        # Before repair, handle common LLM issues:
-        # 1. Replace """ with " to avoid JSON parser errors

-        tool_input = tool_input.replace('"""', '"')
+def _safe_repair_json(tool_input: str) -> str:
+    """Safely repair JSON input.

-        result = repair_json(tool_input)
-        if result in UNABLE_TO_REPAIR_JSON_RESULTS:
-            return tool_input
+    Args:
+        tool_input: The tool input string to repair.

-        return str(result)
+    Returns:
+        The repaired JSON string or original if repair fails.
+    """
+    # Skip repair if the input starts and ends with square brackets
+    # Explanation: The JSON parser has issues handling inputs that are enclosed in square brackets ('[]').
+    # These are typically valid JSON arrays or strings that do not require repair. Attempting to repair such inputs
+    # might lead to unintended alterations, such as wrapping the entire input in additional layers or modifying
+    # the structure in a way that changes its meaning. By skipping the repair for inputs that start and end with
+    # square brackets, we preserve the integrity of these valid JSON structures and avoid unnecessary modifications.
+    if tool_input.startswith("[") and tool_input.endswith("]"):
+        return tool_input
+
+    # Before repair, handle common LLM issues:
+    # 1. Replace """ with " to avoid JSON parser errors
+
+    tool_input = tool_input.replace('"""', '"')
+
+    result = repair_json(tool_input)
+    if result in UNABLE_TO_REPAIR_JSON_RESULTS:
+        return tool_input
+
+    return str(result)