refactor: replace hardcoded denylist with dynamic BaseTool field exclusion in spec gen (#5347)

The spec generator previously used a hardcoded list of field names to
exclude from init_params_schema. Any new field or computed_field added
to BaseTool (like tool_type from 86ce54f) would silently leak into
tool.specs.json unless someone remembered to update that list.

Now _extract_init_params() dynamically computes BaseTool's fields at
import time via model_fields + model_computed_fields, so any future
additions to BaseTool are automatically excluded.

Fields from intermediate base classes (RagTool, BraveSearchToolBase,
SerpApiBaseTool) are correctly preserved since they're not on BaseTool.

TDD:
- RED: 3 new tests confirming BaseTool field leak, intermediate base
  preservation, and future-proofing — all failed before the fix
- GREEN: Dynamic allowlist applied — all 10 tests pass
- Regenerated tool.specs.json (tool_type removed from all tools)
This commit is contained in:
iris-clawd
2026-04-08 08:49:16 -07:00
committed by GitHub
parent 0e8ed75947
commit 1ae237a287
3 changed files with 212 additions and 708 deletions

View File

@@ -154,21 +154,19 @@ class ToolSpecExtractor:
return default_value
# Dynamically computed from BaseTool so that any future fields or
# computed_fields added to BaseTool are automatically excluded from
# the generated spec — no hardcoded denylist to maintain.
# ``package_dependencies`` is not a BaseTool field but is extracted
# into its own top-level key, so it's also excluded from init_params.
_BASE_TOOL_FIELDS: set[str] = (
set(BaseTool.model_fields)
| set(BaseTool.model_computed_fields)
| {"package_dependencies"}
)
@staticmethod
def _extract_init_params(tool_class: type[BaseTool]) -> dict[str, Any]:
ignored_init_params = [
"name",
"description",
"env_vars",
"args_schema",
"description_updated",
"cache_function",
"result_as_answer",
"max_usage_count",
"current_usage_count",
"package_dependencies",
]
json_schema = tool_class.model_json_schema(
schema_generator=SchemaGenerator, mode="serialization"
)
@@ -176,8 +174,14 @@ class ToolSpecExtractor:
json_schema["properties"] = {
key: value
for key, value in json_schema["properties"].items()
if key not in ignored_init_params
if key not in ToolSpecExtractor._BASE_TOOL_FIELDS
}
if "required" in json_schema:
json_schema["required"] = [
key
for key in json_schema["required"]
if key not in ToolSpecExtractor._BASE_TOOL_FIELDS
]
return json_schema
def save_to_json(self, output_path: str) -> None:

View File

@@ -45,6 +45,26 @@ class MockTool(BaseTool):
)
# --- Intermediate base class (like RagTool, BraveSearchToolBase) ---
class MockIntermediateBase(BaseTool):
"""Simulates an intermediate tool base class (e.g. RagTool, BraveSearchToolBase)."""
name: str = "Intermediate Base"
description: str = "An intermediate tool base"
shared_config: str = Field("default_config", description="Config from intermediate base")
def _run(self, query: str) -> str:
return query
class MockDerivedTool(MockIntermediateBase):
"""A tool inheriting from an intermediate base, like CodeDocsSearchTool(RagTool)."""
name: str = "Derived Tool"
description: str = "A tool that inherits from intermediate base"
derived_param: str = Field("derived_default", description="Param specific to derived tool")
@pytest.fixture
def extractor():
ext = ToolSpecExtractor()
@@ -169,6 +189,87 @@ def test_extract_package_dependencies(mock_tool_extractor):
]
def test_base_tool_fields_excluded_from_init_params(mock_tool_extractor):
"""BaseTool internal fields (including computed_field like tool_type) must
never appear in init_params_schema. Studio reads this schema to render
the tool config UI — internal fields confuse users."""
init_schema = mock_tool_extractor["init_params_schema"]
props = set(init_schema.get("properties", {}).keys())
required = set(init_schema.get("required", []))
# These are all BaseTool's own fields — none should leak
base_fields = {"name", "description", "env_vars", "args_schema",
"description_updated", "cache_function", "result_as_answer",
"max_usage_count", "current_usage_count", "tool_type",
"package_dependencies"}
leaked_props = base_fields & props
assert not leaked_props, (
f"BaseTool fields leaked into init_params_schema properties: {leaked_props}"
)
leaked_required = base_fields & required
assert not leaked_required, (
f"BaseTool fields leaked into init_params_schema required: {leaked_required}"
)
def test_intermediate_base_fields_preserved_for_derived_tool(extractor):
"""When a tool inherits from an intermediate base (e.g. RagTool),
the intermediate's fields should be included — only BaseTool's own
fields are excluded."""
with (
mock.patch(
"crewai_tools.generate_tool_specs.dir",
return_value=["MockDerivedTool"],
),
mock.patch(
"crewai_tools.generate_tool_specs.getattr",
return_value=MockDerivedTool,
),
):
extractor.extract_all_tools()
assert len(extractor.tools_spec) == 1
tool_info = extractor.tools_spec[0]
props = set(tool_info["init_params_schema"].get("properties", {}).keys())
# Intermediate base's field should be preserved
assert "shared_config" in props, (
"Intermediate base class fields should be preserved in init_params_schema"
)
# Derived tool's own field should be preserved
assert "derived_param" in props, (
"Derived tool's own fields should be preserved in init_params_schema"
)
# BaseTool internals should still be excluded
assert "tool_type" not in props
assert "cache_function" not in props
assert "result_as_answer" not in props
def test_future_base_tool_field_auto_excluded(extractor):
"""If a new field is added to BaseTool in the future, it should be
automatically excluded from spec generation without needing to update
the ignored list. This test verifies the allowlist approach works
by checking that ONLY non-BaseTool fields appear."""
with (
mock.patch("crewai_tools.generate_tool_specs.dir", return_value=["MockTool"]),
mock.patch("crewai_tools.generate_tool_specs.getattr", return_value=MockTool),
):
extractor.extract_all_tools()
tool_info = extractor.tools_spec[0]
props = set(tool_info["init_params_schema"].get("properties", {}).keys())
base_all = set(BaseTool.model_fields) | set(BaseTool.model_computed_fields)
leaked = base_all & props
assert not leaked, (
f"BaseTool fields should be auto-excluded but found: {leaked}. "
"The spec generator should dynamically compute BaseTool's fields "
"instead of using a hardcoded denylist."
)
def test_save_to_json(extractor, tmp_path):
extractor.tools_spec = [
{

File diff suppressed because it is too large Load Diff