fix: handle circular $ref in MCP tool JSON schemas (#5474)

MCP servers exposing self-referential JSON schemas (e.g. ms-365-mcp-server
with >10 tools) triggered 'maximum recursion depth exceeded' because:

1. jsonref.replace_refs(proxies=False) infinitely inlines circular $refs
2. Downstream recursive visitors (force_additional_properties_false, etc.)
   loop on the resulting circular Python dicts
3. resolve_refs and _json_schema_to_pydantic_type had no cycle detection

Fix:
- Add _has_circular_refs() to detect circular $ref chains
- Add _break_circular_refs() to replace circular refs with {type: object} stubs
- Wrap jsonref.replace_refs in _safe_replace_refs() that breaks cycles first
- Add cycle detection to resolve_refs() using a resolving-set parameter
- Add cycle detection to _json_schema_to_pydantic_type() via _resolving_refs

Tests added for all new helpers and end-to-end circular schema scenarios.

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-04-15 19:03:12 +00:00
parent 1c90d574ab
commit ae09793712
2 changed files with 452 additions and 12 deletions

View File

@@ -91,6 +91,9 @@ def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
This is needed because Pydantic generates $ref-based schemas that
some consumers (e.g. LLMs, tool frameworks) don't handle well.
Circular references are detected and replaced with a plain
``{"type": "object"}`` stub to prevent infinite recursion.
Args:
schema: JSON Schema dict that may contain "$refs" and "$defs".
@@ -100,18 +103,23 @@ def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
defs = schema.get("$defs", {})
schema_copy = deepcopy(schema)
def _resolve(node: Any) -> Any:
def _resolve(node: Any, resolving: frozenset[str] = frozenset()) -> Any:
if isinstance(node, dict):
ref = node.get("$ref")
if isinstance(ref, str) and ref.startswith("#/$defs/"):
def_name = ref.replace("#/$defs/", "")
if def_name in resolving:
return {"type": "object"}
if def_name in defs:
return _resolve(deepcopy(defs[def_name]))
return _resolve(
deepcopy(defs[def_name]),
resolving | {def_name},
)
raise KeyError(f"Definition '{def_name}' not found in $defs.")
return {k: _resolve(v) for k, v in node.items()}
return {k: _resolve(v, resolving) for k, v in node.items()}
if isinstance(node, list):
return [_resolve(i) for i in node]
return [_resolve(i, resolving) for i in node]
return node
@@ -658,6 +666,93 @@ def build_rich_field_description(prop_schema: dict[str, Any]) -> str:
return ". ".join(parts) if parts else ""
# Thread-local set tracking which schemas are currently being converted to
# Pydantic models. Used by ``_json_schema_to_pydantic_type`` to detect
# circular ``$ref`` chains and break the recursion with a ``dict`` fallback.
_resolving_refs: set[str] = set()
def _safe_replace_refs(json_schema: dict[str, Any]) -> dict[str, Any]:
"""Resolve ``$ref`` pointers in *json_schema*, tolerating circular refs.
``jsonref.replace_refs(proxies=False)`` performs eager, recursive
inlining. When a definition refers back to itself (directly or
transitively) this blows the Python call stack and also produces
Python dicts with circular object references that break all
downstream recursive visitors.
Strategy: always break circular ``$ref`` chains *before* handing the
schema to ``jsonref`` so the library never encounters a cycle.
"""
schema_copy = deepcopy(json_schema)
defs = schema_copy.get("$defs", {})
if defs and _has_circular_refs(schema_copy, defs):
_break_circular_refs(schema_copy, defs, set())
try:
return dict(jsonref.replace_refs(schema_copy, proxies=False))
except RecursionError:
# Last resort - return the manually patched copy as-is.
return schema_copy
def _has_circular_refs(
node: Any,
defs: dict[str, Any],
visiting: set[str] | None = None,
) -> bool:
"""Return ``True`` if *node* contains any circular ``$ref`` chain."""
if visiting is None:
visiting = set()
if isinstance(node, dict):
ref = node.get("$ref")
if isinstance(ref, str) and ref.startswith("#/$defs/"):
def_name = ref.removeprefix("#/$defs/")
if def_name in visiting:
return True
if def_name in defs:
visiting.add(def_name)
if _has_circular_refs(defs[def_name], defs, visiting):
return True
visiting.discard(def_name)
for value in node.values():
if _has_circular_refs(value, defs, visiting):
return True
elif isinstance(node, list):
for item in node:
if _has_circular_refs(item, defs, visiting):
return True
return False
def _break_circular_refs(
node: Any,
defs: dict[str, Any],
visiting: set[str],
) -> None:
"""Walk *node* in-place and replace circular ``$ref`` pointers with stubs."""
if isinstance(node, dict):
ref = node.get("$ref")
if isinstance(ref, str) and ref.startswith("#/$defs/"):
def_name = ref.removeprefix("#/$defs/")
if def_name in visiting:
# Circular - replace the *whole* node content with a stub.
node.clear()
node["type"] = "object"
return
if def_name in defs:
visiting.add(def_name)
_break_circular_refs(defs[def_name], defs, visiting)
visiting.discard(def_name)
for value in node.values():
_break_circular_refs(value, defs, visiting)
elif isinstance(node, list):
for item in node:
_break_circular_refs(item, defs, visiting)
def create_model_from_schema( # type: ignore[no-any-unimported]
json_schema: dict[str, Any],
*,
@@ -677,6 +772,10 @@ def create_model_from_schema( # type: ignore[no-any-unimported]
as nested objects, referenced definitions ($ref), arrays with typed items,
union types (anyOf/oneOf), and string formats.
Circular ``$ref`` chains (common in complex MCP tool schemas) are detected
and broken automatically so that deeply-nested or self-referential schemas
never trigger a ``RecursionError``.
Args:
json_schema: A dictionary representing the JSON schema.
root_schema: The root schema containing $defs. If not provided, the
@@ -712,7 +811,7 @@ def create_model_from_schema( # type: ignore[no-any-unimported]
>>> person.name
'John'
"""
json_schema = dict(jsonref.replace_refs(json_schema, proxies=False))
json_schema = _safe_replace_refs(json_schema)
effective_root = root_schema or json_schema
@@ -920,13 +1019,21 @@ def _json_schema_to_pydantic_type(
"""
ref = json_schema.get("$ref")
if ref:
ref_schema = _resolve_ref(ref, root_schema)
return _json_schema_to_pydantic_type(
ref_schema,
root_schema,
name_=name_,
enrich_descriptions=enrich_descriptions,
)
# Detect circular $ref chains - if we are already resolving this
# ref higher up the call stack, break the cycle by returning dict.
if ref in _resolving_refs:
return dict
_resolving_refs.add(ref)
try:
ref_schema = _resolve_ref(ref, root_schema)
return _json_schema_to_pydantic_type(
ref_schema,
root_schema,
name_=name_,
enrich_descriptions=enrich_descriptions,
)
finally:
_resolving_refs.discard(ref)
enum_values = json_schema.get("enum")
if enum_values:

View File

@@ -19,6 +19,9 @@ import pytest
from pydantic import BaseModel
from crewai.utilities.pydantic_schema_utils import (
_break_circular_refs,
_has_circular_refs,
_safe_replace_refs,
build_rich_field_description,
convert_oneof_to_anyof,
create_model_from_schema,
@@ -882,3 +885,333 @@ class TestEndToEndMCPSchema:
)
assert obj.filters.date_from == datetime.date(2025, 1, 1)
assert obj.filters.categories == ["news", "tech"]
# ---------------------------------------------------------------------------
# Circular $ref handling (issue #5474)
# ---------------------------------------------------------------------------
class TestCircularRefDetection:
"""Tests for _has_circular_refs helper."""
def test_detects_direct_self_reference(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"child": {"$ref": "#/$defs/Node"}},
"$defs": {
"Node": {
"type": "object",
"properties": {
"children": {
"type": "array",
"items": {"$ref": "#/$defs/Node"},
},
},
},
},
}
assert _has_circular_refs(schema, schema["$defs"]) is True
def test_detects_indirect_circular_reference(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"a": {"$ref": "#/$defs/A"}},
"$defs": {
"A": {
"type": "object",
"properties": {"b": {"$ref": "#/$defs/B"}},
},
"B": {
"type": "object",
"properties": {"a": {"$ref": "#/$defs/A"}},
},
},
}
assert _has_circular_refs(schema, schema["$defs"]) is True
def test_no_circular_ref(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"item": {"$ref": "#/$defs/Item"}},
"$defs": {
"Item": {
"type": "object",
"properties": {"name": {"type": "string"}},
},
},
}
assert _has_circular_refs(schema, schema["$defs"]) is False
class TestBreakCircularRefs:
"""Tests for _break_circular_refs helper."""
def test_breaks_direct_self_reference(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"child": {"$ref": "#/$defs/Node"}},
"$defs": {
"Node": {
"type": "object",
"properties": {
"name": {"type": "string"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/Node"},
},
},
},
},
}
_break_circular_refs(schema, schema["$defs"], set())
# The self-referential $ref inside Node's items should be replaced
items = schema["$defs"]["Node"]["properties"]["children"]["items"]
assert items == {"type": "object"}
assert "$ref" not in items
def test_preserves_non_circular_refs(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"item": {"$ref": "#/$defs/Item"}},
"$defs": {
"Item": {
"type": "object",
"properties": {"name": {"type": "string"}},
},
},
}
original = deepcopy(schema)
_break_circular_refs(schema, schema["$defs"], set())
# Non-circular schema should be unchanged
assert schema == original
class TestSafeReplaceRefs:
"""Tests for _safe_replace_refs."""
def test_resolves_non_circular_schema(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"item": {"$ref": "#/$defs/Item"}},
"$defs": {
"Item": {
"type": "object",
"properties": {"id": {"type": "integer"}},
},
},
}
result = _safe_replace_refs(schema)
assert "$ref" not in result.get("properties", {}).get("item", {})
assert result["properties"]["item"]["type"] == "object"
def test_handles_circular_schema_without_recursion_error(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"root": {"$ref": "#/$defs/TreeNode"}},
"$defs": {
"TreeNode": {
"type": "object",
"properties": {
"name": {"type": "string"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/TreeNode"},
},
},
},
},
}
# Must not raise RecursionError
result = _safe_replace_refs(schema)
assert isinstance(result, dict)
class TestResolveRefsCircular:
"""Tests that resolve_refs handles circular references."""
def test_circular_ref_does_not_recurse(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"root": {"$ref": "#/$defs/Node"}},
"$defs": {
"Node": {
"type": "object",
"properties": {
"child": {"$ref": "#/$defs/Node"},
},
},
},
}
resolved = resolve_refs(schema)
# The circular ref should become {"type": "object"} stub
child = resolved["properties"]["root"]["properties"]["child"]
assert child == {"type": "object"}
def test_indirect_circular_ref(self) -> None:
schema: dict[str, Any] = {
"type": "object",
"properties": {"a": {"$ref": "#/$defs/A"}},
"$defs": {
"A": {
"type": "object",
"properties": {"b": {"$ref": "#/$defs/B"}},
},
"B": {
"type": "object",
"properties": {"a": {"$ref": "#/$defs/A"}},
},
},
}
resolved = resolve_refs(schema)
# A -> B -> A(cycle) => the second A should be a stub
b_schema = resolved["properties"]["a"]["properties"]["b"]
assert b_schema["properties"]["a"] == {"type": "object"}
class TestCreateModelCircularRef:
"""End-to-end tests for create_model_from_schema with circular $ref schemas.
Regression tests for GitHub issue #5474: MCP servers with >10 tools
that expose self-referential JSON schemas caused
``RecursionError: maximum recursion depth exceeded``.
"""
def test_direct_self_referential_schema(self) -> None:
"""A type that references itself (tree-like structure)."""
schema: dict[str, Any] = {
"type": "object",
"properties": {
"name": {"type": "string"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/TreeNode"},
},
},
"required": ["name"],
"$defs": {
"TreeNode": {
"type": "object",
"properties": {
"name": {"type": "string"},
"children": {
"type": "array",
"items": {"$ref": "#/$defs/TreeNode"},
},
},
"required": ["name"],
},
},
}
Model = create_model_from_schema(schema, model_name="TreeSchema")
assert Model.__name__ == "TreeSchema"
obj = Model(name="root")
assert obj.name == "root"
def test_indirect_circular_reference(self) -> None:
"""Two types that reference each other (A -> B -> A)."""
schema: dict[str, Any] = {
"type": "object",
"properties": {"node": {"$ref": "#/$defs/NodeA"}},
"required": ["node"],
"$defs": {
"NodeA": {
"type": "object",
"properties": {
"name": {"type": "string"},
"linked": {"$ref": "#/$defs/NodeB"},
},
"required": ["name"],
},
"NodeB": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"back": {"$ref": "#/$defs/NodeA"},
},
"required": ["value"],
},
},
}
Model = create_model_from_schema(schema, model_name="MutualRef")
obj = Model(node={"name": "hello", "linked": {"value": 42}})
assert obj.node.name == "hello"
def test_many_tools_with_complex_schemas(self) -> None:
"""Simulate an MCP server exposing >10 tools (issue #5474 trigger)."""
for i in range(15):
tool_schema: dict[str, Any] = {
"type": "object",
"properties": {
"query": {"type": "string"},
"options": {
"type": "object",
"properties": {
"limit": {"type": "integer"},
"filter": {"type": "string"},
},
},
},
"required": ["query"],
}
Model = create_model_from_schema(
tool_schema, model_name=f"Tool{i}Schema"
)
obj = Model(query=f"test_{i}")
assert obj.query == f"test_{i}"
def test_circular_ref_with_enrich_descriptions(self) -> None:
"""Circular schema + enrich_descriptions should not blow up."""
schema: dict[str, Any] = {
"type": "object",
"properties": {
"name": {"type": "string", "description": "Node name"},
"child": {"$ref": "#/$defs/Recursive"},
},
"required": ["name"],
"$defs": {
"Recursive": {
"type": "object",
"properties": {
"name": {"type": "string", "description": "Name"},
"child": {"$ref": "#/$defs/Recursive"},
},
},
},
}
Model = create_model_from_schema(
schema,
model_name="EnrichedCircular",
enrich_descriptions=True,
)
assert Model.__name__ == "EnrichedCircular"
obj = Model(name="top")
assert obj.name == "top"
def test_deeply_nested_non_circular_still_works(self) -> None:
"""A deep but non-circular chain of $refs should still resolve."""
schema: dict[str, Any] = {
"type": "object",
"properties": {"l1": {"$ref": "#/$defs/Level1"}},
"required": ["l1"],
"$defs": {
"Level1": {
"type": "object",
"properties": {"l2": {"$ref": "#/$defs/Level2"}},
"required": ["l2"],
},
"Level2": {
"type": "object",
"properties": {"l3": {"$ref": "#/$defs/Level3"}},
"required": ["l3"],
},
"Level3": {
"type": "object",
"properties": {"value": {"type": "string"}},
"required": ["value"],
},
},
}
Model = create_model_from_schema(schema, model_name="DeepChain")
obj = Model(l1={"l2": {"l3": {"value": "deep"}}})
assert obj.l1.l2.l3.value == "deep"