chore: refactor parser & constants, improve tools_handler, update tests

- Move parser constants to dedicated module with pre-compiled regex - Refactor CrewAgentParser to module functions; remove unused params - Improve tools_handler with instance attributes - Update tests to use module-level parser functions
2026-01-19 21:08:13 +00:00 · 2025-08-29 14:35:08 -04:00
parent ec1eff02a8
commit e4c4b81e63
6 changed files with 249 additions and 188 deletions
--- a/src/crewai/agents/init.py
+++ b/src/crewai/agents/init.py
@@ -1,5 +1,5 @@
-from .cache.cache_handler import CacheHandler
-from .parser import CrewAgentParser
-from .tools_handler import ToolsHandler
+from crewai.agents.cache.cache_handler import CacheHandler
+from crewai.agents.parser import parse, AgentAction, AgentFinish, OutputParserException
+from crewai.agents.tools_handler import ToolsHandler

-__all__ = ["CacheHandler", "CrewAgentParser", "ToolsHandler"]
+__all__ = ["CacheHandler", "parse", "AgentAction", "AgentFinish", "OutputParserException", "ToolsHandler"]
--- a/src/crewai/agents/constants.py
+++ b/src/crewai/agents/constants.py
@@ -0,0 +1,27 @@
+"""Constants for agent-related modules."""
+
+import re
+from typing import Final
+
+# crewai.agents.parser constants
+
+FINAL_ANSWER_ACTION: Final[str] = "Final Answer:"
+MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE: Final[str] = (
+    "I did it wrong. Invalid Format: I missed the 'Action:' after 'Thought:'. I will do right next, and don't use a tool I have already used.\n"
+)
+MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE: Final[str] = (
+    "I did it wrong. Invalid Format: I missed the 'Action Input:' after 'Action:'. I will do right next, and don't use a tool I have already used.\n"
+)
+FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE: Final[str] = (
+    "I did it wrong. Tried to both perform Action and give a Final Answer at the same time, I must do one or the other"
+)
+UNABLE_TO_REPAIR_JSON_RESULTS: Final[list[str]] = ['""', "{}"]
+ACTION_INPUT_REGEX: Final[re.Pattern[str]] = re.compile(
+    r"Action\s*\d*\s*:\s*(.*?)\s*Action\s*\d*\s*Input\s*\d*\s*:\s*(.*)", re.DOTALL
+)
+ACTION_REGEX: Final[re.Pattern[str]] = re.compile(
+    r"Action\s*\d*\s*:\s*(.*?)", re.DOTALL
+)
+ACTION_INPUT_ONLY_REGEX: Final[re.Pattern[str]] = re.compile(
+    r"\s*Action\s*\d*\s*Input\s*\d*\s*:\s*(.*)", re.DOTALL
+)
--- a/src/crewai/agents/parser.py
+++ b/src/crewai/agents/parser.py
@@ -1,50 +1,67 @@
-import re
-from typing import Any, Optional, Union
+"""Agent output parsing module for ReAct-style LLM responses.
+
+This module provides parsing functionality for agent outputs that follow
+the ReAct (Reasoning and Acting) format, converting them into structured
+AgentAction or AgentFinish objects.
+"""
+
+from dataclasses import dataclass

 from json_repair import repair_json

+from crewai.agents.constants import (
+    ACTION_INPUT_REGEX,
+    ACTION_REGEX,
+    ACTION_INPUT_ONLY_REGEX,
+    FINAL_ANSWER_ACTION,
+    MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE,
+    MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
+    UNABLE_TO_REPAIR_JSON_RESULTS,
+)
 from crewai.utilities import I18N

-FINAL_ANSWER_ACTION = "Final Answer:"
-MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE = "I did it wrong. Invalid Format: I missed the 'Action:' after 'Thought:'. I will do right next, and don't use a tool I have already used.\n"
-MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE = "I did it wrong. Invalid Format: I missed the 'Action Input:' after 'Action:'. I will do right next, and don't use a tool I have already used.\n"
-FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE = "I did it wrong. Tried to both perform Action and give a Final Answer at the same time, I must do one or the other"
+_I18N = I18N()


+@dataclass
 class AgentAction:
+    """Represents an action to be taken by an agent."""
+
    thought: str
    tool: str
    tool_input: str
    text: str
-    result: str
-
-    def __init__(self, thought: str, tool: str, tool_input: str, text: str):
-        self.thought = thought
-        self.tool = tool
-        self.tool_input = tool_input
-        self.text = text
+    result: str | None = None


+@dataclass
 class AgentFinish:
+    """Represents the final answer from an agent."""
+
    thought: str
    output: str
    text: str

-    def __init__(self, thought: str, output: str, text: str):
-        self.thought = thought
-        self.output = output
-        self.text = text
-

 class OutputParserException(Exception):
-    error: str
+    """Exception raised when output parsing fails.

-    def __init__(self, error: str):
+    Attributes:
+        error: The error message.
+    """
+
+    def __init__(self, error: str) -> None:
+        """Initialize OutputParserException.
+
+        Args:
+            error: The error message.
+        """
        self.error = error
+        super().__init__(error)


-class CrewAgentParser:
-    """Parses ReAct-style LLM calls that have a single tool input.
+def parse(text: str) -> AgentAction | AgentFinish:
+    """Parse agent output text into AgentAction or AgentFinish.

    Expects output to be in one of two formats.

@@ -62,108 +79,117 @@ class CrewAgentParser:

    Thought: agent thought here
    Final Answer: The temperature is 100 degrees
+
+    Args:
+        text: The agent output text to parse.
+
+    Returns:
+        AgentAction or AgentFinish based on the content.
+
+    Raises:
+        OutputParserException: If the text format is invalid.
    """
+    thought = _extract_thought(text)
+    includes_answer = FINAL_ANSWER_ACTION in text
+    action_match = ACTION_INPUT_REGEX.search(text)

-    _i18n: I18N = I18N()
-    agent: Any = None
+    if includes_answer:
+        final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip()
+        # Check whether the final answer ends with triple backticks.
+        if final_answer.endswith("```"):
+            # Count occurrences of triple backticks in the final answer.
+            count = final_answer.count("```")
+            # If count is odd then it's an unmatched trailing set; remove it.
+            if count % 2 != 0:
+                final_answer = final_answer[:-3].rstrip()
+        return AgentFinish(thought=thought, output=final_answer, text=text)

-    def __init__(self, agent: Optional[Any] = None):
-        self.agent = agent
+    elif action_match:
+        action = action_match.group(1)
+        clean_action = _clean_action(action)

-    @staticmethod
-    def parse_text(text: str) -> Union[AgentAction, AgentFinish]:
-        """
-        Static method to parse text into an AgentAction or AgentFinish without needing to instantiate the class.
+        action_input = action_match.group(2).strip()

-        Args:
-            text: The text to parse.
+        tool_input = action_input.strip(" ").strip('"')
+        safe_tool_input = _safe_repair_json(tool_input)

-        Returns:
-            Either an AgentAction or AgentFinish based on the parsed content.
-        """
-        parser = CrewAgentParser()
-        return parser.parse(text)
-
-    def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
-        thought = self._extract_thought(text)
-        includes_answer = FINAL_ANSWER_ACTION in text
-        regex = (
-            r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
+        return AgentAction(
+            thought=thought, tool=clean_action, tool_input=safe_tool_input, text=text
        )
-        action_match = re.search(regex, text, re.DOTALL)
-        if includes_answer:
-            final_answer = text.split(FINAL_ANSWER_ACTION)[-1].strip()
-            # Check whether the final answer ends with triple backticks.
-            if final_answer.endswith("```"):
-                # Count occurrences of triple backticks in the final answer.
-                count = final_answer.count("```")
-                # If count is odd then it's an unmatched trailing set; remove it.
-                if count % 2 != 0:
-                    final_answer = final_answer[:-3].rstrip()
-            return AgentFinish(thought, final_answer, text)

-        elif action_match:
-            action = action_match.group(1)
-            clean_action = self._clean_action(action)
+    if not ACTION_REGEX.search(text):
+        raise OutputParserException(
+            f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{_I18N.slice('final_answer_format')}",
+        )
+    elif not ACTION_INPUT_ONLY_REGEX.search(text):
+        raise OutputParserException(
+            MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
+        )
+    else:
+        err_format = _I18N.slice("format_without_tools")
+        error = f"{err_format}"
+        raise OutputParserException(
+            error,
+        )

-            action_input = action_match.group(2).strip()

-            tool_input = action_input.strip(" ").strip('"')
-            safe_tool_input = self._safe_repair_json(tool_input)
+def _extract_thought(text: str) -> str:
+    """Extract the thought portion from the text.

-            return AgentAction(thought, clean_action, safe_tool_input, text)
+    Args:
+        text: The full agent output text.

-        if not re.search(r"Action\s*\d*\s*:[\s]*(.*?)", text, re.DOTALL):
-            raise OutputParserException(
-                f"{MISSING_ACTION_AFTER_THOUGHT_ERROR_MESSAGE}\n{self._i18n.slice('final_answer_format')}",
-            )
-        elif not re.search(
-            r"[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)", text, re.DOTALL
-        ):
-            raise OutputParserException(
-                MISSING_ACTION_INPUT_AFTER_ACTION_ERROR_MESSAGE,
-            )
-        else:
-            format = self._i18n.slice("format_without_tools")
-            error = f"{format}"
-            raise OutputParserException(
-                error,
-            )
+    Returns:
+        The extracted thought string.
+    """
+    thought_index = text.find("\nAction")
+    if thought_index == -1:
+        thought_index = text.find("\nFinal Answer")
+    if thought_index == -1:
+        return ""
+    thought = text[:thought_index].strip()
+    # Remove any triple backticks from the thought string
+    thought = thought.replace("```", "").strip()
+    return thought

-    def _extract_thought(self, text: str) -> str:
-        thought_index = text.find("\nAction")
-        if thought_index == -1:
-            thought_index = text.find("\nFinal Answer")
-        if thought_index == -1:
-            return ""
-        thought = text[:thought_index].strip()
-        # Remove any triple backticks from the thought string
-        thought = thought.replace("```", "").strip()
-        return thought

-    def _clean_action(self, text: str) -> str:
-        """Clean action string by removing non-essential formatting characters."""
-        return text.strip().strip("*").strip()
+def _clean_action(text: str) -> str:
+    """Clean action string by removing non-essential formatting characters.

-    def _safe_repair_json(self, tool_input: str) -> str:
-        UNABLE_TO_REPAIR_JSON_RESULTS = ['""', "{}"]
+    Args:
+        text: The action text to clean.

-        # Skip repair if the input starts and ends with square brackets
-        # Explanation: The JSON parser has issues handling inputs that are enclosed in square brackets ('[]').
-        # These are typically valid JSON arrays or strings that do not require repair. Attempting to repair such inputs
-        # might lead to unintended alterations, such as wrapping the entire input in additional layers or modifying
-        # the structure in a way that changes its meaning. By skipping the repair for inputs that start and end with
-        # square brackets, we preserve the integrity of these valid JSON structures and avoid unnecessary modifications.
-        if tool_input.startswith("[") and tool_input.endswith("]"):
-            return tool_input
+    Returns:
+        The cleaned action string.
+    """
+    return text.strip().strip("*").strip()

-        # Before repair, handle common LLM issues:
-        # 1. Replace """ with " to avoid JSON parser errors

-        tool_input = tool_input.replace('"""', '"')
+def _safe_repair_json(tool_input: str) -> str:
+    """Safely repair JSON input.

-        result = repair_json(tool_input)
-        if result in UNABLE_TO_REPAIR_JSON_RESULTS:
-            return tool_input
+    Args:
+        tool_input: The tool input string to repair.

-        return str(result)
+    Returns:
+        The repaired JSON string or original if repair fails.
+    """
+    # Skip repair if the input starts and ends with square brackets
+    # Explanation: The JSON parser has issues handling inputs that are enclosed in square brackets ('[]').
+    # These are typically valid JSON arrays or strings that do not require repair. Attempting to repair such inputs
+    # might lead to unintended alterations, such as wrapping the entire input in additional layers or modifying
+    # the structure in a way that changes its meaning. By skipping the repair for inputs that start and end with
+    # square brackets, we preserve the integrity of these valid JSON structures and avoid unnecessary modifications.
+    if tool_input.startswith("[") and tool_input.endswith("]"):
+        return tool_input
+
+    # Before repair, handle common LLM issues:
+    # 1. Replace """ with " to avoid JSON parser errors
+
+    tool_input = tool_input.replace('"""', '"')
+
+    result = repair_json(tool_input)
+    if result in UNABLE_TO_REPAIR_JSON_RESULTS:
+        return tool_input
+
+    return str(result)
--- a/src/crewai/agents/tools_handler.py
+++ b/src/crewai/agents/tools_handler.py
@@ -1,29 +1,41 @@
-from typing import Any, Optional, Union
+"""Tools handler for managing tool execution and caching."""

-from ..tools.cache_tools.cache_tools import CacheTools
-from ..tools.tool_calling import InstructorToolCalling, ToolCalling
-from .cache.cache_handler import CacheHandler
+from crewai.tools.cache_tools.cache_tools import CacheTools
+from crewai.tools.tool_calling import InstructorToolCalling, ToolCalling
+from crewai.agents.cache.cache_handler import CacheHandler


 class ToolsHandler:
-    """Callback handler for tool usage."""
+    """Callback handler for tool usage.

-    last_used_tool: Optional[ToolCalling] = None
-    cache: Optional[CacheHandler]
+    Attributes:
+        last_used_tool: The most recently used tool calling instance.
+        cache: Optional cache handler for storing tool outputs.
+    """

-    def __init__(self, cache: Optional[CacheHandler] = None):
-        """Initialize the callback handler."""
-        self.cache = cache
-        self.last_used_tool = None
+    def __init__(self, cache: CacheHandler | None = None) -> None:
+        """Initialize the callback handler.
+
+        Args:
+            cache: Optional cache handler for storing tool outputs.
+        """
+        self.cache: CacheHandler | None = cache
+        self.last_used_tool: ToolCalling | InstructorToolCalling | None = None

    def on_tool_use(
        self,
-        calling: Union[ToolCalling, InstructorToolCalling],
+        calling: ToolCalling | InstructorToolCalling,
        output: str,
        should_cache: bool = True,
-    ) -> Any:
-        """Run when tool ends running."""
-        self.last_used_tool = calling  # type: ignore # BUG?: Incompatible types in assignment (expression has type "Union[ToolCalling, InstructorToolCalling]", variable has type "ToolCalling")
+    ) -> None:
+        """Run when tool ends running.
+
+        Args:
+            calling: The tool calling instance.
+            output: The output from the tool execution.
+            should_cache: Whether to cache the tool output.
+        """
+        self.last_used_tool = calling
        if self.cache and should_cache and calling.tool_name != CacheTools().name:
            self.cache.add(
                tool=calling.tool_name,
--- a/src/crewai/utilities/agent_utils.py
+++ b/src/crewai/utilities/agent_utils.py
@@ -2,12 +2,12 @@ import json
 import re
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union

+from crewai.agents.constants import FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE
 from crewai.agents.parser import (
-    FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE,
    AgentAction,
    AgentFinish,
-    CrewAgentParser,
    OutputParserException,
+    parse,
 )
 from crewai.llm import LLM
 from crewai.llms.base_llm import BaseLLM
@@ -25,6 +25,7 @@ from crewai.cli.config import Settings

 console = Console()

+
 def parse_tools(tools: List[BaseTool]) -> List[CrewStructuredTool]:
    """Parse tools to be used for the task."""
    tools_list = []
@@ -122,7 +123,7 @@ def format_message_for_llm(prompt: str, role: str = "user") -> Dict[str, str]:
 def format_answer(answer: str) -> Union[AgentAction, AgentFinish]:
    """Format a response from the LLM into an AgentAction or AgentFinish."""
    try:
-        return CrewAgentParser.parse_text(answer)
+        return parse(answer)
    except Exception:
        # If parsing fails, return a default AgentFinish
        return AgentFinish(
@@ -446,9 +447,16 @@ def show_agent_logs(
 def _print_current_organization():
    settings = Settings()
    if settings.org_uuid:
-        console.print(f"Fetching agent from organization: {settings.org_name} ({settings.org_uuid})", style="bold blue")
+        console.print(
+            f"Fetching agent from organization: {settings.org_name} ({settings.org_uuid})",
+            style="bold blue",
+        )
    else:
-        console.print("No organization currently set. We recommend setting one before using: `crewai org switch <org_id>` command.", style="yellow")
+        console.print(
+            "No organization currently set. We recommend setting one before using: `crewai org switch <org_id>` command.",
+            style="yellow",
+        )
+

 def load_agent_from_repository(from_repository: str) -> Dict[str, Any]:
    attributes: Dict[str, Any] = {}