fix: unify tool name sanitization across codebase

2026-01-23 07:08:14 +00:00 · 2026-01-22 19:01:14 -05:00
parent e9ca6e89d8
commit 846133310b
12 changed files with 121 additions and 47 deletions
--- a/lib/crewai/src/crewai/agents/agent_adapters/base_tool_adapter.py
+++ b/lib/crewai/src/crewai/agents/agent_adapters/base_tool_adapter.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any

+from crewai.utilities.string_utils import sanitize_tool_name as _sanitize_tool_name
+

 if TYPE_CHECKING:
    from crewai.tools.base_tool import BaseTool
@@ -35,4 +37,4 @@ class BaseToolAdapter(ABC):
    @staticmethod
    def sanitize_tool_name(tool_name: str) -> str:
        """Sanitize tool name for API compatibility."""
-        return tool_name.replace(" ", "_")
+        return _sanitize_tool_name(tool_name)
--- a/lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_agent_tool_adapter.py
+++ b/lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_agent_tool_adapter.py
@@ -7,7 +7,6 @@ to OpenAI Assistant-compatible format using the agents library.
 from collections.abc import Awaitable
 import inspect
 import json
-import re
 from typing import Any, cast

 from crewai.agents.agent_adapters.base_tool_adapter import BaseToolAdapter
@@ -17,6 +16,7 @@ from crewai.agents.agent_adapters.openai_agents.protocols import (
 )
 from crewai.tools import BaseTool
 from crewai.utilities.import_utils import require
+from crewai.utilities.string_utils import sanitize_tool_name


 agents_module = cast(
@@ -78,18 +78,6 @@ class OpenAIAgentToolAdapter(BaseToolAdapter):
        if not tools:
            return []

-        def sanitize_tool_name(name: str) -> str:
-            """Convert tool name to match OpenAI's required pattern.
-
-            Args:
-                name: Original tool name.
-
-            Returns:
-                Sanitized tool name matching OpenAI requirements.
-            """
-
-            return re.sub(r"[^a-zA-Z0-9_-]", "_", name).lower()
-
        def create_tool_wrapper(tool: BaseTool) -> Any:
            """Create a wrapper function that handles the OpenAI function tool interface.

--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -47,6 +47,7 @@ from crewai.utilities.agent_utils import (
 from crewai.utilities.constants import TRAINING_DATA_FILE
 from crewai.utilities.i18n import I18N, get_i18n
 from crewai.utilities.printer import Printer
+from crewai.utilities.string_utils import sanitize_tool_name
 from crewai.utilities.tool_utils import (
    aexecute_tool_and_check_finality,
    execute_tool_and_check_finality,
@@ -636,12 +637,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"

        # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
-        import re

        original_tool = None
        for tool in self.original_tools or []:
-            sanitized_name = re.sub(r"[^a-zA-Z0-9_.\-:]", "_", tool.name)
-            if sanitized_name == func_name:
+            if sanitize_tool_name(tool.name) == func_name:
                original_tool = tool
                break

@@ -753,6 +752,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        tool_message: LLMMessage = {
            "role": "tool",
            "tool_call_id": call_id,
+            "name": func_name,
            "content": result,
        }
        self.messages.append(tool_message)
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -602,12 +602,11 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            )

            # Find original tool by matching sanitized name (needed for cache_function and result_as_answer)
-            import re
+            from crewai.utilities.string_utils import sanitize_tool_name

            original_tool = None
            for tool in self.original_tools or []:
-                sanitized_name = re.sub(r"[^a-zA-Z0-9_.\-:]", "_", tool.name)
-                if sanitized_name == func_name:
+                if sanitize_tool_name(tool.name) == func_name:
                    original_tool = tool
                    break

@@ -721,6 +720,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            tool_message: LLMMessage = {
                "role": "tool",
                "tool_call_id": call_id,
+                "name": func_name,
                "content": result,
            }
            self.state.messages.append(tool_message)
--- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -444,9 +444,9 @@ class AnthropicCompletion(BaseLLM):
                else:
                    system_message = cast(str, content)
            elif role == "tool":
-                # Convert OpenAI-style tool message to Anthropic tool_result format
-                # These will be collected and added as a user message
                tool_call_id = message.get("tool_call_id", "")
+                if not tool_call_id:
+                    raise ValueError("Tool message missing required tool_call_id")
                tool_result = {
                    "type": "tool_result",
                    "tool_use_id": tool_call_id,
--- a/lib/crewai/src/crewai/llms/providers/azure/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py
@@ -517,9 +517,10 @@ class AzureCompletion(BaseLLM):
            # Handle None content - Azure requires string content
            content = message.get("content") or ""

-            # Handle tool role messages - keep as tool role for Azure OpenAI
            if role == "tool":
-                tool_call_id = message.get("tool_call_id", "unknown")
+                tool_call_id = message.get("tool_call_id", "")
+                if not tool_call_id:
+                    raise ValueError("Tool message missing required tool_call_id")
                azure_messages.append(
                    {
                        "role": "tool",
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -1340,8 +1340,9 @@ class BedrockCompletion(BaseLLM):
                converse_messages.append(
                    {"role": "assistant", "content": bedrock_content}
                )
-            elif role == "tool" and tool_call_id:
-                # Convert OpenAI-style tool response to Bedrock toolResult format
+            elif role == "tool":
+                if not tool_call_id:
+                    raise ValueError("Tool message missing required tool_call_id")
                converse_messages.append(
                    {
                        "role": "user",
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -531,6 +531,53 @@ class GeminiCompletion(BaseLLM):
                    system_instruction += f"\n\n{text_content}"
                else:
                    system_instruction = text_content
+            elif role == "tool":
+                tool_call_id = message.get("tool_call_id")
+                if not tool_call_id:
+                    raise ValueError("Tool message missing required tool_call_id")
+
+                tool_name = message.get("name", "")
+
+                response_data: dict[str, Any]
+                try:
+                    response_data = json.loads(text_content) if text_content else {}
+                except (json.JSONDecodeError, TypeError):
+                    response_data = {"result": text_content}
+
+                function_response_part = types.Part.from_function_response(
+                    name=tool_name, response=response_data
+                )
+                contents.append(
+                    types.Content(role="user", parts=[function_response_part])
+                )
+            elif role == "assistant" and message.get("tool_calls"):
+                parts: list[types.Part] = []
+
+                if text_content:
+                    parts.append(types.Part.from_text(text=text_content))
+
+                tool_calls: list[dict[str, Any]] = message.get("tool_calls") or []
+                for tool_call in tool_calls:
+                    func: dict[str, Any] = tool_call.get("function") or {}
+                    func_name: str = str(func.get("name") or "")
+                    func_args_raw: str | dict[str, Any] = func.get("arguments") or {}
+
+                    func_args: dict[str, Any]
+                    if isinstance(func_args_raw, str):
+                        try:
+                            func_args = (
+                                json.loads(func_args_raw) if func_args_raw else {}
+                            )
+                        except (json.JSONDecodeError, TypeError):
+                            func_args = {}
+                    else:
+                        func_args = func_args_raw
+
+                    parts.append(
+                        types.Part.from_function_call(name=func_name, args=func_args)
+                    )
+
+                contents.append(types.Content(role="model", parts=parts))
            else:
                # Convert role for Gemini (assistant -> model)
                gemini_role = "model" if role == "assistant" else "user"
--- a/lib/crewai/src/crewai/llms/providers/utils/common.py
+++ b/lib/crewai/src/crewai/llms/providers/utils/common.py
@@ -2,16 +2,12 @@ import logging
 import re
 from typing import Any

+from crewai.utilities.string_utils import sanitize_tool_name
+

 def validate_function_name(name: str, provider: str = "LLM") -> str:
    """Validate function name according to common LLM provider requirements.

-    Most LLM providers (OpenAI, Gemini, Anthropic) have similar requirements:
-    - Must start with letter or underscore
-    - Only alphanumeric, underscore, dot, colon, dash allowed
-    - Maximum length of 64 characters
-    - Cannot be empty
-
    Args:
        name: The function name to validate
        provider: The provider name for error messages
@@ -35,11 +31,10 @@ def validate_function_name(name: str, provider: str = "LLM") -> str:
            f"{provider} function name '{name}' exceeds 64 character limit"
        )

-    # Check for invalid characters (most providers support these)
-    if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_.\-:]*$", name):
+    if not re.match(r"^[a-z_][a-z0-9_]*$", name):
        raise ValueError(
            f"{provider} function name '{name}' contains invalid characters. "
-            f"Only letters, numbers, underscore, dot, colon, dash allowed"
+            f"Only lowercase letters, numbers, and underscores allowed"
        )

    return name
@@ -108,16 +103,13 @@ def log_tool_conversion(tool: dict[str, Any], provider: str) -> None:
 def sanitize_function_name(name: str) -> str:
    """Sanitize function name for LLM provider compatibility.

-    Replaces invalid characters with underscores. Valid characters are:
-    letters, numbers, underscore, dot, colon, and dash.
-
    Args:
        name: Original function name

    Returns:
-        Sanitized function name with invalid characters replaced
+        Sanitized function name (lowercase, a-z0-9_ only, max 64 chars)
    """
-    return re.sub(r"[^a-zA-Z0-9_.\-:]", "_", name)
+    return sanitize_tool_name(name)


 def safe_tool_conversion(
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -28,6 +28,7 @@ from crewai.utilities.exceptions.context_window_exceeding_exception import (
 )
 from crewai.utilities.i18n import I18N
 from crewai.utilities.printer import ColoredText, Printer
+from crewai.utilities.string_utils import sanitize_tool_name
 from crewai.utilities.token_counter_callback import TokenCalcHandler
 from crewai.utilities.types import LLMMessage

@@ -96,15 +97,15 @@ def parse_tools(tools: list[BaseTool]) -> list[CrewStructuredTool]:


 def get_tool_names(tools: Sequence[CrewStructuredTool | BaseTool]) -> str:
-    """Get the names of the tools.
+    """Get the sanitized names of the tools.

    Args:
        tools: List of tools to get names from.

    Returns:
-        Comma-separated string of tool names.
+        Comma-separated string of sanitized tool names.
    """
-    return ", ".join([t.name for t in tools])
+    return ", ".join([sanitize_tool_name(t.name) for t in tools])


 def render_text_description_and_args(
@@ -168,10 +169,9 @@ def convert_tools_to_openai_schema(
        # BaseTool formats description as "Tool Name: ...\nTool Arguments: ...\nTool Description: {original}"
        description = tool.description
        if "Tool Description:" in description:
-            # Extract the original description after "Tool Description:"
            description = description.split("Tool Description:")[-1].strip()

-        sanitized_name = re.sub(r"[^a-zA-Z0-9_.\-:]", "_", tool.name)
+        sanitized_name = sanitize_tool_name(tool.name)

        schema: dict[str, Any] = {
            "type": "function",
@@ -182,7 +182,7 @@ def convert_tools_to_openai_schema(
            },
        }
        openai_tools.append(schema)
-        available_functions[sanitized_name] = tool.run  # type: ignore[attr-defined]
+        available_functions[sanitized_name] = tool.run  # type: ignore[union-attr]

    return openai_tools, available_functions

--- a/lib/crewai/src/crewai/utilities/string_utils.py
+++ b/lib/crewai/src/crewai/utilities/string_utils.py
@@ -1,8 +1,48 @@
+# sanitize_tool_name adapted from python-slugify by Val Neekman
+# https://github.com/un33k/python-slugify
+# MIT License
+
 import re
 from typing import Any, Final
+import unicodedata


 _VARIABLE_PATTERN: Final[re.Pattern[str]] = re.compile(r"\{([A-Za-z_][A-Za-z0-9_\-]*)}")
+_QUOTE_PATTERN: Final[re.Pattern[str]] = re.compile(r"[\'\"]+")
+_CAMEL_LOWER_UPPER: Final[re.Pattern[str]] = re.compile(r"([a-z])([A-Z])")
+_CAMEL_UPPER_LOWER: Final[re.Pattern[str]] = re.compile(r"([A-Z]+)([A-Z][a-z])")
+_DISALLOWED_CHARS_PATTERN: Final[re.Pattern[str]] = re.compile(r"[^a-zA-Z0-9]+")
+_DUPLICATE_UNDERSCORE_PATTERN: Final[re.Pattern[str]] = re.compile(r"_+")
+_MAX_TOOL_NAME_LENGTH: Final[int] = 64
+
+
+def sanitize_tool_name(name: str, max_length: int = _MAX_TOOL_NAME_LENGTH) -> str:
+    """Sanitize tool name for LLM provider compatibility.
+
+    Normalizes Unicode, splits camelCase, lowercases, replaces invalid characters
+    with underscores, and truncates to max_length. Conforms to OpenAI/Bedrock requirements.
+
+    Args:
+        name: Original tool name.
+        max_length: Maximum allowed length (default 64 per OpenAI/Bedrock limits).
+
+    Returns:
+        Sanitized tool name (lowercase, a-z0-9_ only, max 64 chars).
+    """
+    name = unicodedata.normalize("NFKD", name)
+    name = name.encode("ascii", "ignore").decode("ascii")
+    name = _CAMEL_UPPER_LOWER.sub(r"\1_\2", name)
+    name = _CAMEL_LOWER_UPPER.sub(r"\1_\2", name)
+    name = name.lower()
+    name = _QUOTE_PATTERN.sub("", name)
+    name = _DISALLOWED_CHARS_PATTERN.sub("_", name)
+    name = _DUPLICATE_UNDERSCORE_PATTERN.sub("_", name)
+    name = name.strip("_")
+
+    if len(name) > max_length:
+        name = name[:max_length].rstrip("_")
+
+    return name


 def interpolate_only(
--- a/lib/crewai/src/crewai/utilities/types.py
+++ b/lib/crewai/src/crewai/utilities/types.py
@@ -2,7 +2,7 @@

 from typing import Any, Literal

-from typing_extensions import TypedDict
+from typing_extensions import NotRequired, TypedDict


 class LLMMessage(TypedDict):
@@ -15,3 +15,6 @@ class LLMMessage(TypedDict):

    role: Literal["user", "assistant", "system", "tool"]
    content: str | list[dict[str, Any]] | None
+    tool_call_id: NotRequired[str]
+    name: NotRequired[str]
+    tool_calls: NotRequired[list[dict[str, Any]]]