fix: resolve complex schema $ref pointers in mcp tools

* fix: resolve complex schema $ref pointers in mcp tools * chore: update tool specifications * fix: adapt mcp tools; sanitize pydantic json schemas * fix: strip nulls from json schemas and simplify mcp args --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2026-04-30 14:52:36 +00:00 · 2026-02-03 20:47:58 -05:00
parent 3fec4669af
commit 3cc33ef6ab
3 changed files with 411 additions and 111 deletions
--- a/lib/crewai-tools/src/crewai_tools/adapters/mcp_adapter.py
+++ b/lib/crewai-tools/src/crewai_tools/adapters/mcp_adapter.py
@@ -2,29 +2,95 @@

 from __future__ import annotations

+from collections.abc import Callable
 import logging
 from typing import TYPE_CHECKING, Any

 from crewai.tools import BaseTool
+from crewai.utilities.pydantic_schema_utils import create_model_from_schema
+from crewai.utilities.string_utils import sanitize_tool_name
+from pydantic import BaseModel

 from crewai_tools.adapters.tool_collection import ToolCollection


-logger = logging.getLogger(__name__)
-
 if TYPE_CHECKING:
    from mcp import StdioServerParameters
-    from mcpadapt.core import MCPAdapt
-    from mcpadapt.crewai_adapter import CrewAIAdapter
+    from mcp.types import CallToolResult, TextContent, Tool
+    from mcpadapt.core import MCPAdapt, ToolAdapter
+
+
+logger = logging.getLogger(__name__)


 try:
    from mcp import StdioServerParameters
-    from mcpadapt.core import MCPAdapt
-    from mcpadapt.crewai_adapter import CrewAIAdapter
+    from mcp.types import CallToolResult, TextContent, Tool
+    from mcpadapt.core import MCPAdapt, ToolAdapter
+
+    class CrewAIToolAdapter(ToolAdapter):
+        """Adapter that creates CrewAI tools with properly normalized JSON schemas.
+
+        This adapter bypasses mcpadapt's model creation which adds invalid null values
+        to field schemas, instead using CrewAI's own schema utilities.
+        """
+
+        def adapt(
+            self,
+            func: Callable[[dict[str, Any] | None], CallToolResult],
+            mcp_tool: Tool,
+        ) -> BaseTool:
+            """Adapt a MCP tool to a CrewAI tool.
+
+            Args:
+                func: The function to call when the tool is invoked.
+                mcp_tool: The MCP tool definition to adapt.
+
+            Returns:
+                A CrewAI BaseTool instance.
+            """
+            tool_name = sanitize_tool_name(mcp_tool.name)
+            tool_description = mcp_tool.description or ""
+            args_model = create_model_from_schema(mcp_tool.inputSchema)
+
+            class CrewAIMCPTool(BaseTool):
+                name: str = tool_name
+                description: str = tool_description
+                args_schema: type[BaseModel] = args_model
+
+                def _run(self, **kwargs: Any) -> Any:
+                    result = func(kwargs)
+                    if len(result.content) == 1:
+                        first_content = result.content[0]
+                        if isinstance(first_content, TextContent):
+                            return first_content.text
+                        return str(first_content)
+                    return str(
+                        [
+                            content.text
+                            for content in result.content
+                            if isinstance(content, TextContent)
+                        ]
+                    )
+
+                def _generate_description(self) -> None:
+                    schema = self.args_schema.model_json_schema()
+                    schema.pop("$defs", None)
+                    self.description = (
+                        f"Tool Name: {self.name}\n"
+                        f"Tool Arguments: {schema}\n"
+                        f"Tool Description: {self.description}"
+                    )
+
+            return CrewAIMCPTool()
+
+        async def async_adapt(self, afunc: Any, mcp_tool: Tool) -> Any:
+            """Async adaptation is not supported by CrewAI."""
+            raise NotImplementedError("async is not supported by the CrewAI framework.")

    MCP_AVAILABLE = True
-except ImportError:
+except ImportError as e:
+    logger.debug(f"MCP packages not available: {e}")
    MCP_AVAILABLE = False


@@ -34,9 +100,6 @@ class MCPServerAdapter:
    Note: tools can only be accessed after the server has been started with the
        `start()` method.

-    Attributes:
-        tools: The CrewAI tools available from the MCP server.
-
    Usage:
        # context manager + stdio
        with MCPServerAdapter(...) as tools:
@@ -89,7 +152,9 @@ class MCPServerAdapter:
        super().__init__()
        self._adapter = None
        self._tools = None
-        self._tool_names = list(tool_names) if tool_names else None
+        self._tool_names = (
+            [sanitize_tool_name(name) for name in tool_names] if tool_names else None
+        )

        if not MCP_AVAILABLE:
            import click
@@ -100,7 +165,7 @@ class MCPServerAdapter:
                import subprocess

                try:
-                    subprocess.run(["uv", "add", "mcp crewai-tools[mcp]"], check=True)  # noqa: S607
+                    subprocess.run(["uv", "add", "mcp crewai-tools'[mcp]'"], check=True)  # noqa: S607

                except subprocess.CalledProcessError as e:
                    raise ImportError("Failed to install mcp package") from e
@@ -112,7 +177,7 @@ class MCPServerAdapter:
        try:
            self._serverparams = serverparams
            self._adapter = MCPAdapt(
-                self._serverparams, CrewAIAdapter(), connect_timeout
+                self._serverparams, CrewAIToolAdapter(), connect_timeout
            )
            self.start()

@@ -124,13 +189,13 @@ class MCPServerAdapter:
                    logger.error(f"Error during stop cleanup: {stop_e}")
            raise RuntimeError(f"Failed to initialize MCP Adapter: {e}") from e

-    def start(self):
+    def start(self) -> None:
        """Start the MCP server and initialize the tools."""
-        self._tools = self._adapter.__enter__()
+        self._tools = self._adapter.__enter__()  # type: ignore[union-attr]

-    def stop(self):
+    def stop(self) -> None:
        """Stop the MCP server."""
-        self._adapter.__exit__(None, None, None)
+        self._adapter.__exit__(None, None, None)  # type: ignore[union-attr]

    @property
    def tools(self) -> ToolCollection[BaseTool]:
@@ -152,12 +217,19 @@ class MCPServerAdapter:
            return tools_collection.filter_by_names(self._tool_names)
        return tools_collection

-    def __enter__(self):
-        """Enter the context manager. Note that `__init__()` already starts the MCP server.
-        So tools should already be available.
+    def __enter__(self) -> ToolCollection[BaseTool]:
+        """Enter the context manager.
+
+        Note that `__init__()` already starts the MCP server,
+        so tools should already be available.
        """
        return self.tools

-    def __exit__(self, exc_type, exc_value, traceback):
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: Any,
+    ) -> None:
        """Exit the context manager."""
-        return self._adapter.__exit__(exc_type, exc_value, traceback)
+        self._adapter.__exit__(exc_type, exc_value, traceback)  # type: ignore[union-attr]
--- a/lib/crewai-tools/tool.specs.json
+++ b/lib/crewai-tools/tool.specs.json
@@ -197,7 +197,7 @@
      }
    },
    {
-      "description": "A tool that can be used to search the internet with a search_query.",
+      "description": "A tool that performs web searches using the Brave Search API. Results are returned as structured JSON data.",
      "env_vars": [
        {
          "default": null,
@@ -206,7 +206,7 @@
          "required": true
        }
      ],
-      "humanized_name": "Brave Web Search the internet",
+      "humanized_name": "Brave Search",
      "init_params_schema": {
        "$defs": {
          "EnvVar": {
@@ -245,20 +245,8 @@
            "type": "object"
          }
        },
-        "description": "BraveSearchTool - A tool for performing web searches using the Brave Search API.\n\nThis module provides functionality to search the internet using Brave's Search API,\nsupporting customizable result counts and country-specific searches.\n\nDependencies:\n    - requests\n    - pydantic\n    - python-dotenv (for API key management)",
+        "description": "A tool that performs web searches using the Brave Search API.",
        "properties": {
-          "country": {
-            "anyOf": [
-              {
-                "type": "string"
-              },
-              {
-                "type": "null"
-              }
-            ],
-            "default": "",
-            "title": "Country"
-          },
          "n_results": {
            "default": 10,
            "title": "N Results",
@@ -281,16 +269,161 @@
      "name": "BraveSearchTool",
      "package_dependencies": [],
      "run_params_schema": {
-        "description": "Input for BraveSearchTool.",
+        "description": "Input for BraveSearchTool",
        "properties": {
-          "search_query": {
-            "description": "Mandatory search query you want to use to search the internet",
-            "title": "Search Query",
+          "count": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "The maximum number of results to return. Actual number may be less.",
+            "title": "Count"
+          },
+          "country": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Country code for geo-targeting (e.g., 'US', 'BR').",
+            "title": "Country"
+          },
+          "extra_snippets": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Include up to 5 text snippets for each page if possible.",
+            "title": "Extra Snippets"
+          },
+          "freshness": {
+            "anyOf": [
+              {
+                "enum": [
+                  "pd",
+                  "pw",
+                  "pm",
+                  "py"
+                ],
+                "type": "string"
+              },
+              {
+                "pattern": "^\\d{4}-\\d{2}-\\d{2}to\\d{4}-\\d{2}-\\d{2}$",
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
+            "title": "Freshness"
+          },
+          "offset": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Skip the first N result sets/pages. Max is 9.",
+            "title": "Offset"
+          },
+          "operators": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Whether to apply search operators (e.g., site:example.com).",
+            "title": "Operators"
+          },
+          "query": {
+            "description": "Search query to perform",
+            "title": "Query",
            "type": "string"
+          },
+          "safesearch": {
+            "anyOf": [
+              {
+                "enum": [
+                  "off",
+                  "moderate",
+                  "strict"
+                ],
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Filter out explicit content. Options: off/moderate/strict",
+            "title": "Safesearch"
+          },
+          "search_language": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Language code for the search results (e.g., 'en', 'es').",
+            "title": "Search Language"
+          },
+          "spellcheck": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Attempt to correct spelling errors in the search query.",
+            "title": "Spellcheck"
+          },
+          "text_decorations": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Include markup to highlight search terms in the results.",
+            "title": "Text Decorations"
          }
        },
        "required": [
-          "search_query"
+          "query"
        ],
        "title": "BraveSearchToolSchema",
        "type": "object"
@@ -3741,10 +3874,6 @@
            "title": "Bucket Name",
            "type": "string"
          },
-          "cluster": {
-            "description": "An instance of the Couchbase Cluster connected to the desired Couchbase server.",
-            "title": "Cluster"
-          },
          "collection_name": {
            "description": "The name of the Couchbase collection to search",
            "title": "Collection Name",
@@ -3793,7 +3922,6 @@
          }
        },
        "required": [
-          "cluster",
          "collection_name",
          "scope_name",
          "bucket_name",
@@ -12537,13 +12665,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsAmazonProductScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsAmazonProductScraperTool",
@@ -12766,13 +12890,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsAmazonSearchScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsAmazonSearchScraperTool",
@@ -13008,13 +13128,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsGoogleSearchScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsGoogleSearchScraperTool",
@@ -13198,13 +13314,9 @@
        "properties": {
          "config": {
            "$ref": "#/$defs/OxylabsUniversalScraperConfig"
-          },
-          "oxylabs_api": {
-            "title": "Oxylabs Api"
          }
        },
        "required": [
-          "oxylabs_api",
          "config"
        ],
        "title": "OxylabsUniversalScraperTool",
@@ -20005,6 +20117,18 @@
      "humanized_name": "Web Automation Tool",
      "init_params_schema": {
        "$defs": {
+          "AvailableModel": {
+            "enum": [
+              "gpt-4o",
+              "gpt-4o-mini",
+              "claude-3-5-sonnet-latest",
+              "claude-3-7-sonnet-latest",
+              "computer-use-preview",
+              "gemini-2.0-flash"
+            ],
+            "title": "AvailableModel",
+            "type": "string"
+          },
          "EnvVar": {
            "properties": {
              "default": {
@@ -20082,6 +20206,17 @@
            "default": null,
            "title": "Model Api Key"
          },
+          "model_name": {
+            "anyOf": [
+              {
+                "$ref": "#/$defs/AvailableModel"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": "claude-3-7-sonnet-latest"
+          },
          "project_id": {
            "anyOf": [
              {
@@ -21306,26 +21441,6 @@
            "description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
            "title": "Api Key"
          },
-          "async_client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Async Client"
-          },
-          "client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Client"
-          },
          "extract_depth": {
            "default": "basic",
            "description": "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
@@ -21461,26 +21576,6 @@
            "description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
            "title": "Api Key"
          },
-          "async_client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Async Client"
-          },
-          "client": {
-            "anyOf": [
-              {},
-              {
-                "type": "null"
-              }
-            ],
-            "default": null,
-            "title": "Client"
-          },
          "days": {
            "default": 7,
            "description": "The number of days to search back.",
--- a/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py
+++ b/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py
@@ -19,9 +19,10 @@ from collections.abc import Callable
 from copy import deepcopy
 import datetime
 import logging
-from typing import TYPE_CHECKING, Annotated, Any, Literal, Union
+from typing import TYPE_CHECKING, Annotated, Any, Final, Literal, TypedDict, Union
 import uuid

+import jsonref  # type: ignore[import-untyped]
 from pydantic import (
    UUID1,
    UUID3,
@@ -69,6 +70,21 @@ else:
        EmailStr = str


+class JsonSchemaInfo(TypedDict):
+    """Inner structure for JSON schema metadata."""
+
+    name: str
+    strict: Literal[True]
+    schema: dict[str, Any]
+
+
+class ModelDescription(TypedDict):
+    """Return type for generate_model_description."""
+
+    type: Literal["json_schema"]
+    json_schema: JsonSchemaInfo
+
+
 def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
    """Recursively resolve all local $refs in the given JSON Schema using $defs as the source.

@@ -157,6 +173,72 @@ def force_additional_properties_false(d: Any) -> Any:
    return d


+OPENAI_SUPPORTED_FORMATS: Final[
+    set[Literal["date-time", "date", "time", "duration"]]
+] = {
+    "date-time",
+    "date",
+    "time",
+    "duration",
+}
+
+
+def strip_unsupported_formats(d: Any) -> Any:
+    """Remove format annotations that OpenAI strict mode doesn't support.
+
+    OpenAI only supports: date-time, date, time, duration.
+    Other formats like uri, email, uuid etc. cause validation errors.
+
+    Args:
+        d: The dictionary/list to modify.
+
+    Returns:
+        The modified dictionary/list.
+    """
+    if isinstance(d, dict):
+        format_value = d.get("format")
+        if (
+            isinstance(format_value, str)
+            and format_value not in OPENAI_SUPPORTED_FORMATS
+        ):
+            del d["format"]
+        for v in d.values():
+            strip_unsupported_formats(v)
+    elif isinstance(d, list):
+        for i in d:
+            strip_unsupported_formats(i)
+    return d
+
+
+def ensure_type_in_schemas(d: Any) -> Any:
+    """Ensure all schema objects in anyOf/oneOf have a 'type' key.
+
+    OpenAI strict mode requires every schema to have a 'type' key.
+    Empty schemas {} in anyOf/oneOf are converted to {"type": "object"}.
+
+    Args:
+        d: The dictionary/list to modify.
+
+    Returns:
+        The modified dictionary/list.
+    """
+    if isinstance(d, dict):
+        for key in ("anyOf", "oneOf"):
+            if key in d:
+                schema_list = d[key]
+                for i, schema in enumerate(schema_list):
+                    if isinstance(schema, dict) and schema == {}:
+                        schema_list[i] = {"type": "object"}
+                    else:
+                        ensure_type_in_schemas(schema)
+        for v in d.values():
+            ensure_type_in_schemas(v)
+    elif isinstance(d, list):
+        for item in d:
+            ensure_type_in_schemas(item)
+    return d
+
+
 def fix_discriminator_mappings(schema: dict[str, Any]) -> dict[str, Any]:
    """Replace '#/$defs/...' references in discriminator.mapping with just the model name.

@@ -293,7 +375,49 @@ def ensure_all_properties_required(schema: dict[str, Any]) -> dict[str, Any]:
    return schema


-def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
+def strip_null_from_types(schema: dict[str, Any]) -> dict[str, Any]:
+    """Remove null type from anyOf/type arrays.
+
+    Pydantic generates `T | None` for optional fields, which creates schemas with
+    null in the type. However, for MCP tools, optional fields should be omitted
+    entirely rather than sent as null. This function strips null from types.
+
+    Args:
+        schema: JSON schema dictionary.
+
+    Returns:
+        Modified schema with null types removed.
+    """
+    if isinstance(schema, dict):
+        if "anyOf" in schema:
+            any_of = schema["anyOf"]
+            non_null = [opt for opt in any_of if opt.get("type") != "null"]
+            if len(non_null) == 1:
+                schema.pop("anyOf")
+                schema.update(non_null[0])
+            elif len(non_null) > 1:
+                schema["anyOf"] = non_null
+
+        type_value = schema.get("type")
+        if isinstance(type_value, list) and "null" in type_value:
+            non_null_types = [t for t in type_value if t != "null"]
+            if len(non_null_types) == 1:
+                schema["type"] = non_null_types[0]
+            elif len(non_null_types) > 1:
+                schema["type"] = non_null_types
+
+        for value in schema.values():
+            if isinstance(value, dict):
+                strip_null_from_types(value)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        strip_null_from_types(item)
+
+    return schema
+
+
+def generate_model_description(model: type[BaseModel]) -> ModelDescription:
    """Generate JSON schema description of a Pydantic model.

    This function takes a Pydantic model class and returns its JSON schema,
@@ -304,11 +428,13 @@ def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
        model: A Pydantic model class.

    Returns:
-        A JSON schema dictionary representation of the model.
+        A ModelDescription with JSON schema representation of the model.
    """
    json_schema = model.model_json_schema(ref_template="#/$defs/{model}")

    json_schema = force_additional_properties_false(json_schema)
+    json_schema = strip_unsupported_formats(json_schema)
+    json_schema = ensure_type_in_schemas(json_schema)

    json_schema = resolve_refs(json_schema)

@@ -316,6 +442,7 @@ def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
    json_schema = fix_discriminator_mappings(json_schema)
    json_schema = convert_oneof_to_anyof(json_schema)
    json_schema = ensure_all_properties_required(json_schema)
+    json_schema = strip_null_from_types(json_schema)

    return {
        "type": "json_schema",
@@ -400,6 +527,8 @@ def create_model_from_schema(  # type: ignore[no-any-unimported]
        >>> person.name
        'John'
    """
+    json_schema = dict(jsonref.replace_refs(json_schema, proxies=False))
+
    effective_root = root_schema or json_schema

    json_schema = force_additional_properties_false(json_schema)
@@ -410,7 +539,7 @@ def create_model_from_schema(  # type: ignore[no-any-unimported]
        if "title" not in json_schema and "title" in (root_schema or {}):
            json_schema["title"] = (root_schema or {}).get("title")

-    model_name = json_schema.get("title", "DynamicModel")
+    model_name = json_schema.get("title") or "DynamicModel"
    field_definitions = {
        name: _json_schema_to_pydantic_field(
            name, prop, json_schema.get("required", []), effective_root
@@ -418,9 +547,11 @@ def create_model_from_schema(  # type: ignore[no-any-unimported]
        for name, prop in (json_schema.get("properties", {}) or {}).items()
    }

+    effective_config = __config__ or ConfigDict(extra="forbid")
+
    return create_model_base(
        model_name,
-        __config__=__config__,
+        __config__=effective_config,
        __base__=__base__,
        __module__=__module__,
        __validators__=__validators__,
@@ -599,8 +730,10 @@ def _json_schema_to_pydantic_type(
        any_of_schemas = json_schema.get("anyOf", []) + json_schema.get("oneOf", [])
    if any_of_schemas:
        any_of_types = [
-            _json_schema_to_pydantic_type(schema, root_schema)
-            for schema in any_of_schemas
+            _json_schema_to_pydantic_type(
+                schema, root_schema, name_=f"{name_ or 'Union'}Option{i}"
+            )
+            for i, schema in enumerate(any_of_schemas)
        ]
        return Union[tuple(any_of_types)]  # noqa: UP007

@@ -636,7 +769,7 @@ def _json_schema_to_pydantic_type(
        if properties:
            json_schema_ = json_schema.copy()
            if json_schema_.get("title") is None:
-                json_schema_["title"] = name_
+                json_schema_["title"] = name_ or "DynamicModel"
            return create_model_from_schema(json_schema_, root_schema=root_schema)
        return dict
    if type_ == "null":