fix: resolve complex schema $ref pointers in mcp tools
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled

* fix: resolve complex schema $ref pointers in mcp tools

* chore: update tool specifications

* fix: adapt mcp tools; sanitize pydantic json schemas

* fix: strip nulls from json schemas and simplify mcp args

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Greyson LaLonde
2026-02-03 20:47:58 -05:00
committed by GitHub
parent 3fec4669af
commit 3cc33ef6ab
3 changed files with 411 additions and 111 deletions

View File

@@ -2,29 +2,95 @@
from __future__ import annotations
from collections.abc import Callable
import logging
from typing import TYPE_CHECKING, Any
from crewai.tools import BaseTool
from crewai.utilities.pydantic_schema_utils import create_model_from_schema
from crewai.utilities.string_utils import sanitize_tool_name
from pydantic import BaseModel
from crewai_tools.adapters.tool_collection import ToolCollection
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from mcp import StdioServerParameters
from mcpadapt.core import MCPAdapt
from mcpadapt.crewai_adapter import CrewAIAdapter
from mcp.types import CallToolResult, TextContent, Tool
from mcpadapt.core import MCPAdapt, ToolAdapter
logger = logging.getLogger(__name__)
try:
from mcp import StdioServerParameters
from mcpadapt.core import MCPAdapt
from mcpadapt.crewai_adapter import CrewAIAdapter
from mcp.types import CallToolResult, TextContent, Tool
from mcpadapt.core import MCPAdapt, ToolAdapter
class CrewAIToolAdapter(ToolAdapter):
"""Adapter that creates CrewAI tools with properly normalized JSON schemas.
This adapter bypasses mcpadapt's model creation which adds invalid null values
to field schemas, instead using CrewAI's own schema utilities.
"""
def adapt(
self,
func: Callable[[dict[str, Any] | None], CallToolResult],
mcp_tool: Tool,
) -> BaseTool:
"""Adapt a MCP tool to a CrewAI tool.
Args:
func: The function to call when the tool is invoked.
mcp_tool: The MCP tool definition to adapt.
Returns:
A CrewAI BaseTool instance.
"""
tool_name = sanitize_tool_name(mcp_tool.name)
tool_description = mcp_tool.description or ""
args_model = create_model_from_schema(mcp_tool.inputSchema)
class CrewAIMCPTool(BaseTool):
name: str = tool_name
description: str = tool_description
args_schema: type[BaseModel] = args_model
def _run(self, **kwargs: Any) -> Any:
result = func(kwargs)
if len(result.content) == 1:
first_content = result.content[0]
if isinstance(first_content, TextContent):
return first_content.text
return str(first_content)
return str(
[
content.text
for content in result.content
if isinstance(content, TextContent)
]
)
def _generate_description(self) -> None:
schema = self.args_schema.model_json_schema()
schema.pop("$defs", None)
self.description = (
f"Tool Name: {self.name}\n"
f"Tool Arguments: {schema}\n"
f"Tool Description: {self.description}"
)
return CrewAIMCPTool()
async def async_adapt(self, afunc: Any, mcp_tool: Tool) -> Any:
"""Async adaptation is not supported by CrewAI."""
raise NotImplementedError("async is not supported by the CrewAI framework.")
MCP_AVAILABLE = True
except ImportError:
except ImportError as e:
logger.debug(f"MCP packages not available: {e}")
MCP_AVAILABLE = False
@@ -34,9 +100,6 @@ class MCPServerAdapter:
Note: tools can only be accessed after the server has been started with the
`start()` method.
Attributes:
tools: The CrewAI tools available from the MCP server.
Usage:
# context manager + stdio
with MCPServerAdapter(...) as tools:
@@ -89,7 +152,9 @@ class MCPServerAdapter:
super().__init__()
self._adapter = None
self._tools = None
self._tool_names = list(tool_names) if tool_names else None
self._tool_names = (
[sanitize_tool_name(name) for name in tool_names] if tool_names else None
)
if not MCP_AVAILABLE:
import click
@@ -100,7 +165,7 @@ class MCPServerAdapter:
import subprocess
try:
subprocess.run(["uv", "add", "mcp crewai-tools[mcp]"], check=True) # noqa: S607
subprocess.run(["uv", "add", "mcp crewai-tools'[mcp]'"], check=True) # noqa: S607
except subprocess.CalledProcessError as e:
raise ImportError("Failed to install mcp package") from e
@@ -112,7 +177,7 @@ class MCPServerAdapter:
try:
self._serverparams = serverparams
self._adapter = MCPAdapt(
self._serverparams, CrewAIAdapter(), connect_timeout
self._serverparams, CrewAIToolAdapter(), connect_timeout
)
self.start()
@@ -124,13 +189,13 @@ class MCPServerAdapter:
logger.error(f"Error during stop cleanup: {stop_e}")
raise RuntimeError(f"Failed to initialize MCP Adapter: {e}") from e
def start(self):
def start(self) -> None:
"""Start the MCP server and initialize the tools."""
self._tools = self._adapter.__enter__()
self._tools = self._adapter.__enter__() # type: ignore[union-attr]
def stop(self):
def stop(self) -> None:
"""Stop the MCP server."""
self._adapter.__exit__(None, None, None)
self._adapter.__exit__(None, None, None) # type: ignore[union-attr]
@property
def tools(self) -> ToolCollection[BaseTool]:
@@ -152,12 +217,19 @@ class MCPServerAdapter:
return tools_collection.filter_by_names(self._tool_names)
return tools_collection
def __enter__(self):
"""Enter the context manager. Note that `__init__()` already starts the MCP server.
So tools should already be available.
def __enter__(self) -> ToolCollection[BaseTool]:
"""Enter the context manager.
Note that `__init__()` already starts the MCP server,
so tools should already be available.
"""
return self.tools
def __exit__(self, exc_type, exc_value, traceback):
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: Any,
) -> None:
"""Exit the context manager."""
return self._adapter.__exit__(exc_type, exc_value, traceback)
self._adapter.__exit__(exc_type, exc_value, traceback) # type: ignore[union-attr]

View File

@@ -197,7 +197,7 @@
}
},
{
"description": "A tool that can be used to search the internet with a search_query.",
"description": "A tool that performs web searches using the Brave Search API. Results are returned as structured JSON data.",
"env_vars": [
{
"default": null,
@@ -206,7 +206,7 @@
"required": true
}
],
"humanized_name": "Brave Web Search the internet",
"humanized_name": "Brave Search",
"init_params_schema": {
"$defs": {
"EnvVar": {
@@ -245,20 +245,8 @@
"type": "object"
}
},
"description": "BraveSearchTool - A tool for performing web searches using the Brave Search API.\n\nThis module provides functionality to search the internet using Brave's Search API,\nsupporting customizable result counts and country-specific searches.\n\nDependencies:\n - requests\n - pydantic\n - python-dotenv (for API key management)",
"description": "A tool that performs web searches using the Brave Search API.",
"properties": {
"country": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": "",
"title": "Country"
},
"n_results": {
"default": 10,
"title": "N Results",
@@ -281,16 +269,161 @@
"name": "BraveSearchTool",
"package_dependencies": [],
"run_params_schema": {
"description": "Input for BraveSearchTool.",
"description": "Input for BraveSearchTool",
"properties": {
"search_query": {
"description": "Mandatory search query you want to use to search the internet",
"title": "Search Query",
"count": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"description": "The maximum number of results to return. Actual number may be less.",
"title": "Count"
},
"country": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Country code for geo-targeting (e.g., 'US', 'BR').",
"title": "Country"
},
"extra_snippets": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"description": "Include up to 5 text snippets for each page if possible.",
"title": "Extra Snippets"
},
"freshness": {
"anyOf": [
{
"enum": [
"pd",
"pw",
"pm",
"py"
],
"type": "string"
},
{
"pattern": "^\\d{4}-\\d{2}-\\d{2}to\\d{4}-\\d{2}-\\d{2}$",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Enforce freshness of results. Options: pd/pw/pm/py, or YYYY-MM-DDtoYYYY-MM-DD",
"title": "Freshness"
},
"offset": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"description": "Skip the first N result sets/pages. Max is 9.",
"title": "Offset"
},
"operators": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"description": "Whether to apply search operators (e.g., site:example.com).",
"title": "Operators"
},
"query": {
"description": "Search query to perform",
"title": "Query",
"type": "string"
},
"safesearch": {
"anyOf": [
{
"enum": [
"off",
"moderate",
"strict"
],
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Filter out explicit content. Options: off/moderate/strict",
"title": "Safesearch"
},
"search_language": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Language code for the search results (e.g., 'en', 'es').",
"title": "Search Language"
},
"spellcheck": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"description": "Attempt to correct spelling errors in the search query.",
"title": "Spellcheck"
},
"text_decorations": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"description": "Include markup to highlight search terms in the results.",
"title": "Text Decorations"
}
},
"required": [
"search_query"
"query"
],
"title": "BraveSearchToolSchema",
"type": "object"
@@ -3741,10 +3874,6 @@
"title": "Bucket Name",
"type": "string"
},
"cluster": {
"description": "An instance of the Couchbase Cluster connected to the desired Couchbase server.",
"title": "Cluster"
},
"collection_name": {
"description": "The name of the Couchbase collection to search",
"title": "Collection Name",
@@ -3793,7 +3922,6 @@
}
},
"required": [
"cluster",
"collection_name",
"scope_name",
"bucket_name",
@@ -12537,13 +12665,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsAmazonProductScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsAmazonProductScraperTool",
@@ -12766,13 +12890,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsAmazonSearchScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsAmazonSearchScraperTool",
@@ -13008,13 +13128,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsGoogleSearchScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsGoogleSearchScraperTool",
@@ -13198,13 +13314,9 @@
"properties": {
"config": {
"$ref": "#/$defs/OxylabsUniversalScraperConfig"
},
"oxylabs_api": {
"title": "Oxylabs Api"
}
},
"required": [
"oxylabs_api",
"config"
],
"title": "OxylabsUniversalScraperTool",
@@ -20005,6 +20117,18 @@
"humanized_name": "Web Automation Tool",
"init_params_schema": {
"$defs": {
"AvailableModel": {
"enum": [
"gpt-4o",
"gpt-4o-mini",
"claude-3-5-sonnet-latest",
"claude-3-7-sonnet-latest",
"computer-use-preview",
"gemini-2.0-flash"
],
"title": "AvailableModel",
"type": "string"
},
"EnvVar": {
"properties": {
"default": {
@@ -20082,6 +20206,17 @@
"default": null,
"title": "Model Api Key"
},
"model_name": {
"anyOf": [
{
"$ref": "#/$defs/AvailableModel"
},
{
"type": "null"
}
],
"default": "claude-3-7-sonnet-latest"
},
"project_id": {
"anyOf": [
{
@@ -21306,26 +21441,6 @@
"description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
"title": "Api Key"
},
"async_client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Async Client"
},
"client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Client"
},
"extract_depth": {
"default": "basic",
"description": "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.",
@@ -21461,26 +21576,6 @@
"description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.",
"title": "Api Key"
},
"async_client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Async Client"
},
"client": {
"anyOf": [
{},
{
"type": "null"
}
],
"default": null,
"title": "Client"
},
"days": {
"default": 7,
"description": "The number of days to search back.",

View File

@@ -19,9 +19,10 @@ from collections.abc import Callable
from copy import deepcopy
import datetime
import logging
from typing import TYPE_CHECKING, Annotated, Any, Literal, Union
from typing import TYPE_CHECKING, Annotated, Any, Final, Literal, TypedDict, Union
import uuid
import jsonref # type: ignore[import-untyped]
from pydantic import (
UUID1,
UUID3,
@@ -69,6 +70,21 @@ else:
EmailStr = str
class JsonSchemaInfo(TypedDict):
"""Inner structure for JSON schema metadata."""
name: str
strict: Literal[True]
schema: dict[str, Any]
class ModelDescription(TypedDict):
"""Return type for generate_model_description."""
type: Literal["json_schema"]
json_schema: JsonSchemaInfo
def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
"""Recursively resolve all local $refs in the given JSON Schema using $defs as the source.
@@ -157,6 +173,72 @@ def force_additional_properties_false(d: Any) -> Any:
return d
OPENAI_SUPPORTED_FORMATS: Final[
set[Literal["date-time", "date", "time", "duration"]]
] = {
"date-time",
"date",
"time",
"duration",
}
def strip_unsupported_formats(d: Any) -> Any:
"""Remove format annotations that OpenAI strict mode doesn't support.
OpenAI only supports: date-time, date, time, duration.
Other formats like uri, email, uuid etc. cause validation errors.
Args:
d: The dictionary/list to modify.
Returns:
The modified dictionary/list.
"""
if isinstance(d, dict):
format_value = d.get("format")
if (
isinstance(format_value, str)
and format_value not in OPENAI_SUPPORTED_FORMATS
):
del d["format"]
for v in d.values():
strip_unsupported_formats(v)
elif isinstance(d, list):
for i in d:
strip_unsupported_formats(i)
return d
def ensure_type_in_schemas(d: Any) -> Any:
"""Ensure all schema objects in anyOf/oneOf have a 'type' key.
OpenAI strict mode requires every schema to have a 'type' key.
Empty schemas {} in anyOf/oneOf are converted to {"type": "object"}.
Args:
d: The dictionary/list to modify.
Returns:
The modified dictionary/list.
"""
if isinstance(d, dict):
for key in ("anyOf", "oneOf"):
if key in d:
schema_list = d[key]
for i, schema in enumerate(schema_list):
if isinstance(schema, dict) and schema == {}:
schema_list[i] = {"type": "object"}
else:
ensure_type_in_schemas(schema)
for v in d.values():
ensure_type_in_schemas(v)
elif isinstance(d, list):
for item in d:
ensure_type_in_schemas(item)
return d
def fix_discriminator_mappings(schema: dict[str, Any]) -> dict[str, Any]:
"""Replace '#/$defs/...' references in discriminator.mapping with just the model name.
@@ -293,7 +375,49 @@ def ensure_all_properties_required(schema: dict[str, Any]) -> dict[str, Any]:
return schema
def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
def strip_null_from_types(schema: dict[str, Any]) -> dict[str, Any]:
"""Remove null type from anyOf/type arrays.
Pydantic generates `T | None` for optional fields, which creates schemas with
null in the type. However, for MCP tools, optional fields should be omitted
entirely rather than sent as null. This function strips null from types.
Args:
schema: JSON schema dictionary.
Returns:
Modified schema with null types removed.
"""
if isinstance(schema, dict):
if "anyOf" in schema:
any_of = schema["anyOf"]
non_null = [opt for opt in any_of if opt.get("type") != "null"]
if len(non_null) == 1:
schema.pop("anyOf")
schema.update(non_null[0])
elif len(non_null) > 1:
schema["anyOf"] = non_null
type_value = schema.get("type")
if isinstance(type_value, list) and "null" in type_value:
non_null_types = [t for t in type_value if t != "null"]
if len(non_null_types) == 1:
schema["type"] = non_null_types[0]
elif len(non_null_types) > 1:
schema["type"] = non_null_types
for value in schema.values():
if isinstance(value, dict):
strip_null_from_types(value)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
strip_null_from_types(item)
return schema
def generate_model_description(model: type[BaseModel]) -> ModelDescription:
"""Generate JSON schema description of a Pydantic model.
This function takes a Pydantic model class and returns its JSON schema,
@@ -304,11 +428,13 @@ def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
model: A Pydantic model class.
Returns:
A JSON schema dictionary representation of the model.
A ModelDescription with JSON schema representation of the model.
"""
json_schema = model.model_json_schema(ref_template="#/$defs/{model}")
json_schema = force_additional_properties_false(json_schema)
json_schema = strip_unsupported_formats(json_schema)
json_schema = ensure_type_in_schemas(json_schema)
json_schema = resolve_refs(json_schema)
@@ -316,6 +442,7 @@ def generate_model_description(model: type[BaseModel]) -> dict[str, Any]:
json_schema = fix_discriminator_mappings(json_schema)
json_schema = convert_oneof_to_anyof(json_schema)
json_schema = ensure_all_properties_required(json_schema)
json_schema = strip_null_from_types(json_schema)
return {
"type": "json_schema",
@@ -400,6 +527,8 @@ def create_model_from_schema( # type: ignore[no-any-unimported]
>>> person.name
'John'
"""
json_schema = dict(jsonref.replace_refs(json_schema, proxies=False))
effective_root = root_schema or json_schema
json_schema = force_additional_properties_false(json_schema)
@@ -410,7 +539,7 @@ def create_model_from_schema( # type: ignore[no-any-unimported]
if "title" not in json_schema and "title" in (root_schema or {}):
json_schema["title"] = (root_schema or {}).get("title")
model_name = json_schema.get("title", "DynamicModel")
model_name = json_schema.get("title") or "DynamicModel"
field_definitions = {
name: _json_schema_to_pydantic_field(
name, prop, json_schema.get("required", []), effective_root
@@ -418,9 +547,11 @@ def create_model_from_schema( # type: ignore[no-any-unimported]
for name, prop in (json_schema.get("properties", {}) or {}).items()
}
effective_config = __config__ or ConfigDict(extra="forbid")
return create_model_base(
model_name,
__config__=__config__,
__config__=effective_config,
__base__=__base__,
__module__=__module__,
__validators__=__validators__,
@@ -599,8 +730,10 @@ def _json_schema_to_pydantic_type(
any_of_schemas = json_schema.get("anyOf", []) + json_schema.get("oneOf", [])
if any_of_schemas:
any_of_types = [
_json_schema_to_pydantic_type(schema, root_schema)
for schema in any_of_schemas
_json_schema_to_pydantic_type(
schema, root_schema, name_=f"{name_ or 'Union'}Option{i}"
)
for i, schema in enumerate(any_of_schemas)
]
return Union[tuple(any_of_types)] # noqa: UP007
@@ -636,7 +769,7 @@ def _json_schema_to_pydantic_type(
if properties:
json_schema_ = json_schema.copy()
if json_schema_.get("title") is None:
json_schema_["title"] = name_
json_schema_["title"] = name_ or "DynamicModel"
return create_model_from_schema(json_schema_, root_schema=root_schema)
return dict
if type_ == "null":