diff --git a/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py b/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py index 4c69c9bf6..d2acfee16 100644 --- a/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py +++ b/lib/crewai/src/crewai/utilities/pydantic_schema_utils.py @@ -91,6 +91,9 @@ def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]: This is needed because Pydantic generates $ref-based schemas that some consumers (e.g. LLMs, tool frameworks) don't handle well. + Circular references are detected and replaced with a plain + ``{"type": "object"}`` stub to prevent infinite recursion. + Args: schema: JSON Schema dict that may contain "$refs" and "$defs". @@ -100,18 +103,23 @@ def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]: defs = schema.get("$defs", {}) schema_copy = deepcopy(schema) - def _resolve(node: Any) -> Any: + def _resolve(node: Any, resolving: frozenset[str] = frozenset()) -> Any: if isinstance(node, dict): ref = node.get("$ref") if isinstance(ref, str) and ref.startswith("#/$defs/"): def_name = ref.replace("#/$defs/", "") + if def_name in resolving: + return {"type": "object"} if def_name in defs: - return _resolve(deepcopy(defs[def_name])) + return _resolve( + deepcopy(defs[def_name]), + resolving | {def_name}, + ) raise KeyError(f"Definition '{def_name}' not found in $defs.") - return {k: _resolve(v) for k, v in node.items()} + return {k: _resolve(v, resolving) for k, v in node.items()} if isinstance(node, list): - return [_resolve(i) for i in node] + return [_resolve(i, resolving) for i in node] return node @@ -658,6 +666,93 @@ def build_rich_field_description(prop_schema: dict[str, Any]) -> str: return ". ".join(parts) if parts else "" +# Thread-local set tracking which schemas are currently being converted to +# Pydantic models. Used by ``_json_schema_to_pydantic_type`` to detect +# circular ``$ref`` chains and break the recursion with a ``dict`` fallback. +_resolving_refs: set[str] = set() + + +def _safe_replace_refs(json_schema: dict[str, Any]) -> dict[str, Any]: + """Resolve ``$ref`` pointers in *json_schema*, tolerating circular refs. + + ``jsonref.replace_refs(proxies=False)`` performs eager, recursive + inlining. When a definition refers back to itself (directly or + transitively) this blows the Python call stack and also produces + Python dicts with circular object references that break all + downstream recursive visitors. + + Strategy: always break circular ``$ref`` chains *before* handing the + schema to ``jsonref`` so the library never encounters a cycle. + """ + schema_copy = deepcopy(json_schema) + defs = schema_copy.get("$defs", {}) + + if defs and _has_circular_refs(schema_copy, defs): + _break_circular_refs(schema_copy, defs, set()) + + try: + return dict(jsonref.replace_refs(schema_copy, proxies=False)) + except RecursionError: + # Last resort - return the manually patched copy as-is. + return schema_copy + + +def _has_circular_refs( + node: Any, + defs: dict[str, Any], + visiting: set[str] | None = None, +) -> bool: + """Return ``True`` if *node* contains any circular ``$ref`` chain.""" + if visiting is None: + visiting = set() + + if isinstance(node, dict): + ref = node.get("$ref") + if isinstance(ref, str) and ref.startswith("#/$defs/"): + def_name = ref.removeprefix("#/$defs/") + if def_name in visiting: + return True + if def_name in defs: + visiting.add(def_name) + if _has_circular_refs(defs[def_name], defs, visiting): + return True + visiting.discard(def_name) + for value in node.values(): + if _has_circular_refs(value, defs, visiting): + return True + elif isinstance(node, list): + for item in node: + if _has_circular_refs(item, defs, visiting): + return True + return False + + +def _break_circular_refs( + node: Any, + defs: dict[str, Any], + visiting: set[str], +) -> None: + """Walk *node* in-place and replace circular ``$ref`` pointers with stubs.""" + if isinstance(node, dict): + ref = node.get("$ref") + if isinstance(ref, str) and ref.startswith("#/$defs/"): + def_name = ref.removeprefix("#/$defs/") + if def_name in visiting: + # Circular - replace the *whole* node content with a stub. + node.clear() + node["type"] = "object" + return + if def_name in defs: + visiting.add(def_name) + _break_circular_refs(defs[def_name], defs, visiting) + visiting.discard(def_name) + for value in node.values(): + _break_circular_refs(value, defs, visiting) + elif isinstance(node, list): + for item in node: + _break_circular_refs(item, defs, visiting) + + def create_model_from_schema( # type: ignore[no-any-unimported] json_schema: dict[str, Any], *, @@ -677,6 +772,10 @@ def create_model_from_schema( # type: ignore[no-any-unimported] as nested objects, referenced definitions ($ref), arrays with typed items, union types (anyOf/oneOf), and string formats. + Circular ``$ref`` chains (common in complex MCP tool schemas) are detected + and broken automatically so that deeply-nested or self-referential schemas + never trigger a ``RecursionError``. + Args: json_schema: A dictionary representing the JSON schema. root_schema: The root schema containing $defs. If not provided, the @@ -712,7 +811,7 @@ def create_model_from_schema( # type: ignore[no-any-unimported] >>> person.name 'John' """ - json_schema = dict(jsonref.replace_refs(json_schema, proxies=False)) + json_schema = _safe_replace_refs(json_schema) effective_root = root_schema or json_schema @@ -920,13 +1019,21 @@ def _json_schema_to_pydantic_type( """ ref = json_schema.get("$ref") if ref: - ref_schema = _resolve_ref(ref, root_schema) - return _json_schema_to_pydantic_type( - ref_schema, - root_schema, - name_=name_, - enrich_descriptions=enrich_descriptions, - ) + # Detect circular $ref chains - if we are already resolving this + # ref higher up the call stack, break the cycle by returning dict. + if ref in _resolving_refs: + return dict + _resolving_refs.add(ref) + try: + ref_schema = _resolve_ref(ref, root_schema) + return _json_schema_to_pydantic_type( + ref_schema, + root_schema, + name_=name_, + enrich_descriptions=enrich_descriptions, + ) + finally: + _resolving_refs.discard(ref) enum_values = json_schema.get("enum") if enum_values: diff --git a/lib/crewai/tests/utilities/test_pydantic_schema_utils.py b/lib/crewai/tests/utilities/test_pydantic_schema_utils.py index 98a5e6aa5..c030edab2 100644 --- a/lib/crewai/tests/utilities/test_pydantic_schema_utils.py +++ b/lib/crewai/tests/utilities/test_pydantic_schema_utils.py @@ -19,6 +19,9 @@ import pytest from pydantic import BaseModel from crewai.utilities.pydantic_schema_utils import ( + _break_circular_refs, + _has_circular_refs, + _safe_replace_refs, build_rich_field_description, convert_oneof_to_anyof, create_model_from_schema, @@ -882,3 +885,333 @@ class TestEndToEndMCPSchema: ) assert obj.filters.date_from == datetime.date(2025, 1, 1) assert obj.filters.categories == ["news", "tech"] + + +# --------------------------------------------------------------------------- +# Circular $ref handling (issue #5474) +# --------------------------------------------------------------------------- + + +class TestCircularRefDetection: + """Tests for _has_circular_refs helper.""" + + def test_detects_direct_self_reference(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"child": {"$ref": "#/$defs/Node"}}, + "$defs": { + "Node": { + "type": "object", + "properties": { + "children": { + "type": "array", + "items": {"$ref": "#/$defs/Node"}, + }, + }, + }, + }, + } + assert _has_circular_refs(schema, schema["$defs"]) is True + + def test_detects_indirect_circular_reference(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + "$defs": { + "A": { + "type": "object", + "properties": {"b": {"$ref": "#/$defs/B"}}, + }, + "B": { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + }, + }, + } + assert _has_circular_refs(schema, schema["$defs"]) is True + + def test_no_circular_ref(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"item": {"$ref": "#/$defs/Item"}}, + "$defs": { + "Item": { + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + }, + } + assert _has_circular_refs(schema, schema["$defs"]) is False + + +class TestBreakCircularRefs: + """Tests for _break_circular_refs helper.""" + + def test_breaks_direct_self_reference(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"child": {"$ref": "#/$defs/Node"}}, + "$defs": { + "Node": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "children": { + "type": "array", + "items": {"$ref": "#/$defs/Node"}, + }, + }, + }, + }, + } + _break_circular_refs(schema, schema["$defs"], set()) + # The self-referential $ref inside Node's items should be replaced + items = schema["$defs"]["Node"]["properties"]["children"]["items"] + assert items == {"type": "object"} + assert "$ref" not in items + + def test_preserves_non_circular_refs(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"item": {"$ref": "#/$defs/Item"}}, + "$defs": { + "Item": { + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + }, + } + original = deepcopy(schema) + _break_circular_refs(schema, schema["$defs"], set()) + # Non-circular schema should be unchanged + assert schema == original + + +class TestSafeReplaceRefs: + """Tests for _safe_replace_refs.""" + + def test_resolves_non_circular_schema(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"item": {"$ref": "#/$defs/Item"}}, + "$defs": { + "Item": { + "type": "object", + "properties": {"id": {"type": "integer"}}, + }, + }, + } + result = _safe_replace_refs(schema) + assert "$ref" not in result.get("properties", {}).get("item", {}) + assert result["properties"]["item"]["type"] == "object" + + def test_handles_circular_schema_without_recursion_error(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"root": {"$ref": "#/$defs/TreeNode"}}, + "$defs": { + "TreeNode": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "children": { + "type": "array", + "items": {"$ref": "#/$defs/TreeNode"}, + }, + }, + }, + }, + } + # Must not raise RecursionError + result = _safe_replace_refs(schema) + assert isinstance(result, dict) + + +class TestResolveRefsCircular: + """Tests that resolve_refs handles circular references.""" + + def test_circular_ref_does_not_recurse(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"root": {"$ref": "#/$defs/Node"}}, + "$defs": { + "Node": { + "type": "object", + "properties": { + "child": {"$ref": "#/$defs/Node"}, + }, + }, + }, + } + resolved = resolve_refs(schema) + # The circular ref should become {"type": "object"} stub + child = resolved["properties"]["root"]["properties"]["child"] + assert child == {"type": "object"} + + def test_indirect_circular_ref(self) -> None: + schema: dict[str, Any] = { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + "$defs": { + "A": { + "type": "object", + "properties": {"b": {"$ref": "#/$defs/B"}}, + }, + "B": { + "type": "object", + "properties": {"a": {"$ref": "#/$defs/A"}}, + }, + }, + } + resolved = resolve_refs(schema) + # A -> B -> A(cycle) => the second A should be a stub + b_schema = resolved["properties"]["a"]["properties"]["b"] + assert b_schema["properties"]["a"] == {"type": "object"} + + +class TestCreateModelCircularRef: + """End-to-end tests for create_model_from_schema with circular $ref schemas. + + Regression tests for GitHub issue #5474: MCP servers with >10 tools + that expose self-referential JSON schemas caused + ``RecursionError: maximum recursion depth exceeded``. + """ + + def test_direct_self_referential_schema(self) -> None: + """A type that references itself (tree-like structure).""" + schema: dict[str, Any] = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "children": { + "type": "array", + "items": {"$ref": "#/$defs/TreeNode"}, + }, + }, + "required": ["name"], + "$defs": { + "TreeNode": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "children": { + "type": "array", + "items": {"$ref": "#/$defs/TreeNode"}, + }, + }, + "required": ["name"], + }, + }, + } + Model = create_model_from_schema(schema, model_name="TreeSchema") + assert Model.__name__ == "TreeSchema" + obj = Model(name="root") + assert obj.name == "root" + + def test_indirect_circular_reference(self) -> None: + """Two types that reference each other (A -> B -> A).""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"node": {"$ref": "#/$defs/NodeA"}}, + "required": ["node"], + "$defs": { + "NodeA": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "linked": {"$ref": "#/$defs/NodeB"}, + }, + "required": ["name"], + }, + "NodeB": { + "type": "object", + "properties": { + "value": {"type": "integer"}, + "back": {"$ref": "#/$defs/NodeA"}, + }, + "required": ["value"], + }, + }, + } + Model = create_model_from_schema(schema, model_name="MutualRef") + obj = Model(node={"name": "hello", "linked": {"value": 42}}) + assert obj.node.name == "hello" + + def test_many_tools_with_complex_schemas(self) -> None: + """Simulate an MCP server exposing >10 tools (issue #5474 trigger).""" + for i in range(15): + tool_schema: dict[str, Any] = { + "type": "object", + "properties": { + "query": {"type": "string"}, + "options": { + "type": "object", + "properties": { + "limit": {"type": "integer"}, + "filter": {"type": "string"}, + }, + }, + }, + "required": ["query"], + } + Model = create_model_from_schema( + tool_schema, model_name=f"Tool{i}Schema" + ) + obj = Model(query=f"test_{i}") + assert obj.query == f"test_{i}" + + def test_circular_ref_with_enrich_descriptions(self) -> None: + """Circular schema + enrich_descriptions should not blow up.""" + schema: dict[str, Any] = { + "type": "object", + "properties": { + "name": {"type": "string", "description": "Node name"}, + "child": {"$ref": "#/$defs/Recursive"}, + }, + "required": ["name"], + "$defs": { + "Recursive": { + "type": "object", + "properties": { + "name": {"type": "string", "description": "Name"}, + "child": {"$ref": "#/$defs/Recursive"}, + }, + }, + }, + } + Model = create_model_from_schema( + schema, + model_name="EnrichedCircular", + enrich_descriptions=True, + ) + assert Model.__name__ == "EnrichedCircular" + obj = Model(name="top") + assert obj.name == "top" + + def test_deeply_nested_non_circular_still_works(self) -> None: + """A deep but non-circular chain of $refs should still resolve.""" + schema: dict[str, Any] = { + "type": "object", + "properties": {"l1": {"$ref": "#/$defs/Level1"}}, + "required": ["l1"], + "$defs": { + "Level1": { + "type": "object", + "properties": {"l2": {"$ref": "#/$defs/Level2"}}, + "required": ["l2"], + }, + "Level2": { + "type": "object", + "properties": {"l3": {"$ref": "#/$defs/Level3"}}, + "required": ["l3"], + }, + "Level3": { + "type": "object", + "properties": {"value": {"type": "string"}}, + "required": ["value"], + }, + }, + } + Model = create_model_from_schema(schema, model_name="DeepChain") + obj = Model(l1={"l2": {"l3": {"value": "deep"}}}) + assert obj.l1.l2.l3.value == "deep"