magicciv/tools/validate-game-data.py
Natalie fe5db2d25f feat(@projects/@magic-civilization): implement tech-gated observation recording
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-05-13 16:21:35 -07:00

994 lines
44 KiB
Python

#!/usr/bin/env python3
"""Validate Age of Dwarves game pack JSON files against their schemas.
Schemas live in: public/games/age-of-dwarves/data/schemas/*.schema.json
Data sources:
- Split dirs: data/units/, data/buildings/, data/techs/, data/terrain/
- Single files: data/races.json, data/ai_personalities.json
- Resources: public/resources/wilds/wilds.json
- Manifested: data/improvements/ manifest references public/resources/improvements/
Usage:
python3 tools/validate-game-data.py [--root /path/to/project] [--verbose]
Exit code 0 = all pass, 1 = failures found.
"""
import argparse
import json
import sys
from pathlib import Path
try:
from jsonschema import Draft202012Validator, ValidationError
HAS_JSONSCHEMA = True
except ImportError:
HAS_JSONSCHEMA = False
def load_json(path: Path):
try:
return json.loads(path.read_text())
except json.JSONDecodeError as e:
return None, str(e)
return None, "unknown error"
def load_json_safe(path: Path):
try:
data = json.loads(path.read_text())
return data, None
except json.JSONDecodeError as e:
return None, str(e)
class GameDataValidator:
def __init__(self, root: Path, verbose: bool = False):
self.root = root
self.game_data = root / "public" / "games" / "age-of-dwarves" / "data"
self.resources = root / "public" / "resources"
self.schema_dir = self.game_data / "schemas"
self.verbose = verbose
self.passed = 0
self.failed = 0
self.errors: list[str] = []
def _ok(self, label: str):
self.passed += 1
if self.verbose:
print(f" PASS {label}")
def _fail(self, label: str, reason: str):
self.failed += 1
msg = f"FAIL {label}: {reason}"
self.errors.append(msg)
print(f" {msg}")
def _load_schema(self, name: str):
path = self.schema_dir / f"{name}.schema.json"
data, err = load_json_safe(path)
if err:
self._fail(f"schema/{name}", f"parse error: {err}")
return None
return data
def _validate_entry(self, schema, entry: dict, label: str):
if not HAS_JSONSCHEMA:
return
validator = Draft202012Validator(schema)
errs = list(validator.iter_errors(entry))
if errs:
for e in errs[:2]:
path = ".".join(str(p) for p in e.absolute_path) or "(root)"
self._fail(label, f"{path}: {e.message}")
else:
self._ok(label)
def _collect_entries_from_file(self, path: Path) -> list[tuple[str, dict]]:
"""Extract (label, entry_dict) pairs from a JSON file, handling all DataLoader shapes."""
data, err = load_json_safe(path)
if err:
self._fail(str(path.relative_to(self.root)), f"parse error: {err}")
return []
rel = path.relative_to(self.root)
if isinstance(data, list):
return [(f"{rel}[{i}]", e) for i, e in enumerate(data) if isinstance(e, dict)]
if isinstance(data, dict):
# Top-level dict with single "id" = single entry
if "id" in data and isinstance(data["id"], str):
return [(str(rel), data)]
# Keyed collection (ai_personalities shape: {clan_id: {id, name, ...}})
results = []
for key, val in data.items():
if isinstance(val, dict) and ("id" in val or "name" in val):
results.append((f"{rel}/{key}", val))
if results:
return results
# Wrapped array: {"races": [...], "terrains": [...], ...}
for wrap_key, wrap_val in data.items():
if isinstance(wrap_val, list):
return [(f"{rel}[{i}]", e) for i, e in enumerate(wrap_val) if isinstance(e, dict)]
return []
# ── Category validators ───────────────────────────────────────────
def validate_split_dir(self, category_label: str, dir_path: Path, schema_name: str):
schema = self._load_schema(schema_name)
if schema is None:
return
files = sorted(f for f in dir_path.glob("*.json")
if not f.name.endswith(".schema.json")
and f.name not in ("manifest.json", "building_categories.json"))
if not files:
print(f" (no files in {dir_path.relative_to(self.root)})")
return
print(f"\n {category_label} ({len(files)} files)")
for f in files:
for label, entry in self._collect_entries_from_file(f):
self._validate_entry(schema, entry, label)
def validate_single_file(self, label: str, path: Path, schema_name: str, wrap_key: str | None = None):
schema = self._load_schema(schema_name)
if schema is None:
return
data, err = load_json_safe(path)
if err:
self._fail(label, f"parse error: {err}")
return
rel = path.relative_to(self.root)
print(f"\n {label}")
# Unwrap a top-level key if given (e.g. {"races": [...]})
entries_data = data
if wrap_key and isinstance(data, dict) and wrap_key in data:
entries_data = data[wrap_key]
if isinstance(entries_data, list):
for i, entry in enumerate(entries_data):
if isinstance(entry, dict):
self._validate_entry(schema, entry, f"{rel}[{i}]")
elif isinstance(entries_data, dict):
# Dict-of-dicts (ai_personalities shape)
for key, val in entries_data.items():
if isinstance(val, dict):
self._validate_entry(schema, val, f"{rel}/{key}")
else:
self._fail(label, "unexpected top-level type")
def validate_wilds(self):
schema = self._load_schema("wilds")
if schema is None:
return
path = self.resources / "wilds" / "wilds.json"
data, err = load_json_safe(path)
if err:
self._fail("wilds.json", f"parse error: {err}")
return
rel = path.relative_to(self.root)
print(f"\n wilds")
# wilds.json shape: {"wilds": { ... single config object ... }}
inner = data.get("wilds", data) if isinstance(data, dict) else data
if isinstance(inner, dict):
self._validate_entry(schema, inner, str(rel))
else:
self._fail(str(rel), "unexpected shape")
# ── p2-44a: promotions.json shape ─────────────────────────────────
# `promotions.json` is a config-shaped file consumed by DataLoader's raw
# path: top-level `trees` dict (keyed by combat-class tree id), an
# `xp_thresholds` array, and a `heal_on_promote_percent` integer. The
# AI promotion picker (`mc-ai`) and the human modal (`promotion_picker.gd`)
# both walk `trees`. Drift in this shape silently breaks the entire
# promotion path — no `unit_promoted` events fire — so we lock it down.
REQUIRED_PROMOTION_TREES: tuple[str, ...] = ("melee", "ranged", "siege")
def validate_promotions(self):
path = self.resources / "promotions" / "promotions.json"
if not path.exists():
self._fail("promotions.json", f"missing at {path.relative_to(self.root)}")
return
data, err = load_json_safe(path)
if err:
self._fail("promotions.json", f"parse error: {err}")
return
rel = str(path.relative_to(self.root))
print(f"\n promotions.json")
if not isinstance(data, dict):
self._fail(rel, "top-level must be a JSON object")
return
for required_key in ("trees", "xp_thresholds", "heal_on_promote_percent"):
if required_key not in data:
self._fail(rel, f"missing required key '{required_key}'")
else:
self._ok(f"{rel}/{required_key}")
trees = data.get("trees")
if not isinstance(trees, dict):
self._fail(f"{rel}/trees", "must be an object keyed by tree id")
return
for tree_id in self.REQUIRED_PROMOTION_TREES:
if tree_id not in trees:
self._fail(f"{rel}/trees", f"missing required combat-class tree '{tree_id}'")
else:
tree = trees[tree_id]
if not isinstance(tree, dict) or not isinstance(tree.get("levels"), list):
self._fail(f"{rel}/trees/{tree_id}", "tree must have a 'levels' array")
else:
self._ok(f"{rel}/trees/{tree_id}")
xp = data.get("xp_thresholds")
if not (isinstance(xp, list) and xp and all(isinstance(n, (int, float)) for n in xp)):
self._fail(f"{rel}/xp_thresholds", "must be a non-empty array of numbers")
else:
self._ok(f"{rel}/xp_thresholds[shape]")
# ── p1-55: TechDomain enum membership ────────────────────────────
# The canonical 10-value list mirrors `mc_core::TechDomain::ALL`. If
# the Rust enum gains/loses a variant, update this list AND the test
# in `mc-tech/src/web.rs::all_authored_techs_have_valid_domain`.
TECH_DOMAINS: tuple[str, ...] = (
"Military", "Economy", "Industry", "Agriculture", "Governance",
"Culture", "Science", "Exploration", "Engineering", "Medicine",
)
def validate_tech_domains(self):
"""Every authored tech in the canonical SSoT must declare
`domain` ∈ canonical 10-value enum. Legacy `game_data/techs`
is intentionally skipped: per the post-p1-40 architecture, the
single source of truth lives at `public/resources/techs/`.
"""
tech_dirs: list[Path] = []
if (self.resources / "techs").exists():
tech_dirs.append(self.resources / "techs")
if not tech_dirs:
return
print(f"\n tech domain enum membership ({len(self.TECH_DOMAINS)} canonical values)")
valid = set(self.TECH_DOMAINS)
for tdir in tech_dirs:
for f in sorted(tdir.glob("*.json")):
if f.name in ("manifest.json",) or f.name.endswith(".schema.json"):
continue
for label, entry in self._collect_entries_from_file(f):
domain = entry.get("domain")
if domain is None:
self._fail(label, "tech missing required `domain` field")
continue
if not isinstance(domain, str) or domain not in valid:
self._fail(
label,
f"tech.domain={domain!r} not in canonical TechDomain enum {sorted(valid)}",
)
else:
self.passed += 1
def validate_improvements(self):
"""Improvements live in public/resources/improvements/ (not the game data dir)."""
schema = self._load_schema("improvement")
if schema is None:
return
imp_dir = self.resources / "improvements"
files = sorted(f for f in imp_dir.glob("*.json")
if not f.name.endswith(".schema.json") and f.name not in ("manifest.json", "improvements.json", "registry.md"))
print(f"\n improvements ({len(files)} files)")
for f in files:
for label, entry in self._collect_entries_from_file(f):
self._validate_entry(schema, entry, label)
def validate_specialists(self):
"""Specialists live in public/resources/specialists/. Every entry must
carry a `category` matching the mc_core::WorkerCategory enum
(sustenance | construction | wealth)."""
schema = self._load_schema("specialist")
if schema is None:
return
spec_dir = self.resources / "specialists"
if not spec_dir.exists():
print("\n public/resources/specialists/ (not present — skipping)")
return
files = sorted(f for f in spec_dir.glob("*.json")
if not f.name.endswith(".schema.json") and f.name not in ("manifest.json", "registry.md"))
print(f"\n specialists ({len(files)} files)")
valid_categories = {"sustenance", "construction", "wealth"}
for f in files:
for label, entry in self._collect_entries_from_file(f):
self._validate_entry(schema, entry, label)
cat = entry.get("category")
cat_label = f"{label}.category"
if cat is None:
self._fail(cat_label, "specialist missing required `category`")
elif cat not in valid_categories:
self._fail(cat_label,
f"category={cat!r} not in WorkerCategory {sorted(valid_categories)}")
else:
self._ok(cat_label)
def _load_id_set_from_split_dir(self, dir_path: Path) -> set[str]:
"""Collect all 'id' values from every JSON entry in a split directory."""
ids: set[str] = set()
for f in dir_path.glob("*.json"):
if f.name in ("manifest.json", "building_categories.json") or f.name.endswith(".schema.json"):
continue
for _label, entry in self._collect_entries_from_file(f):
if "id" in entry:
ids.add(entry["id"])
return ids
def _load_resources(self) -> dict[str, dict] | None:
"""Load the resource/deposit registry from public/resources/deposits/*.json.
Entries with scope=="game_2" are excluded — they gate magic units/buildings
that don't exist in Game 1 and would produce false cross-ref failures.
Returns None with a notice if the directory is absent.
"""
deposits_dir = self.resources / "deposits"
if not deposits_dir.exists():
print("\n public/resources/deposits/ (not present — skipping cross-reference pass)")
return None
result: dict[str, dict] = {}
for f in sorted(deposits_dir.glob("*.json")):
if f.name in ("deposits.schema.json", "manifest.json", "registry.md",
"deposit_categories.json"):
continue
data, err = load_json_safe(f)
if err or not isinstance(data, dict) or "id" not in data:
continue
if data.get("scope") == "game_2":
continue
result[data["id"]] = data
return result
def _load_biomes(self) -> list[dict] | None:
path = self.game_data / "biomes.json"
if not path.exists():
print("\n biomes.json (not present — skipping biome cross-reference pass)")
return None
data, err = load_json_safe(path)
if err:
self._fail("biomes.json", f"parse error: {err}")
return None
return data if isinstance(data, list) else data.get("biomes", [])
def validate_biomes(self):
path = self.game_data / "biomes.json"
if not path.exists():
return
schema = self._load_schema("biome")
if schema is None:
return
self.validate_single_file("biomes.json", path, "biome", wrap_key="biomes")
def _load_concept_ids(self) -> set[str] | None:
"""Load economic concept IDs from public/resources/resources.json."""
path = self.resources / "resources.json"
if not path.exists():
return None
data, err = load_json_safe(path)
if err:
return None
ids: set[str] = set()
if isinstance(data, dict):
for cat_entries in data.values():
if isinstance(cat_entries, list):
for entry in cat_entries:
if isinstance(entry, dict) and "id" in entry:
ids.add(entry["id"])
return ids
def validate_deposit_concept_refs(self):
"""Every Game-1 deposit must have a non-null concept_resource in resources.json."""
concept_ids = self._load_concept_ids()
if concept_ids is None:
print("\n public/resources/resources.json (not present — skipping concept_resource pass)")
return
deposits_dir = self.resources / "deposits"
if not deposits_dir.exists():
return
skip = {"deposit_categories.json", "deposits.schema.json", "registry.md",
"magical.json", "marine.json", "mineral.json", "organic.json"}
files = sorted(f for f in deposits_dir.glob("*.json") if f.name not in skip)
print(f"\n deposit concept_resource refs ({len(files)} files)")
for f in files:
data, err = load_json_safe(f)
if err or not isinstance(data, dict):
continue
dep_id = data.get("id", f.stem)
scope = data.get("scope", "game_1")
concept = data.get("concept_resource", "__missing__")
label = f"deposits/{dep_id}.concept_resource"
if concept == "__missing__":
self._fail(label, "field absent — add concept_resource")
elif scope == "game_2":
self._ok(label) # null allowed for game_2 deposits
elif concept is None:
self._fail(label, "null concept_resource on a game_1 deposit")
elif concept not in concept_ids:
self._fail(label, f"'{concept}' not found in resources.json")
else:
self._ok(label)
def validate_guide_data(self):
"""Validate the four guide-consumed JSON files extracted from hardcoded
page enums (p2-32). Each has a minimal schema in data/schemas/."""
print("\n guide-data enums")
# homepage-features.json: {"features": [card, ...]}
schema = self._load_schema("homepage-features")
if schema is not None:
path = self.game_data / "homepage-features.json"
data, err = load_json_safe(path)
if err:
self._fail("homepage-features.json", f"parse error: {err}")
else:
rel = path.relative_to(self.root)
for i, card in enumerate(data.get("features", [])):
self._validate_entry(schema, card, f"{rel}[features][{i}]")
# map-topologies.json: {"topologies": [topology, ...]}
schema = self._load_schema("map-topology")
if schema is not None:
path = self.game_data / "map-topologies.json"
data, err = load_json_safe(path)
if err:
self._fail("map-topologies.json", f"parse error: {err}")
else:
rel = path.relative_to(self.root)
for i, topo in enumerate(data.get("topologies", [])):
self._validate_entry(schema, topo, f"{rel}[topologies][{i}]")
# episodes/ep1-systems.json: whole-file wrapper validation
schema = self._load_schema("episode-systems")
if schema is not None:
path = self.game_data / "episodes" / "ep1-systems.json"
data, err = load_json_safe(path)
if err:
self._fail("episodes/ep1-systems.json", f"parse error: {err}")
else:
rel = path.relative_to(self.root)
self._validate_entry(schema, data, str(rel))
# shipping-roadmap.json: whole-file wrapper validation
schema = self._load_schema("shipping-roadmap")
if schema is not None:
path = self.game_data / "shipping-roadmap.json"
data, err = load_json_safe(path)
if err:
self._fail("shipping-roadmap.json", f"parse error: {err}")
else:
rel = path.relative_to(self.root)
self._validate_entry(schema, data, str(rel))
def validate_resources_kind(self):
"""Every resource in public/resources/resources.json must declare a
`kind` ∈ {raw, processed} per the production-chain taxonomy
(objective p2-57a). Mirrors `mc_core::ResourceKind`.
"""
path = self.resources / "resources.json"
if not path.exists():
return
data, err = load_json_safe(path)
if err or not isinstance(data, dict):
return
valid_kinds = {"raw", "processed"}
print("\n resources.json kind enum membership")
for cat in ("bonus", "luxury", "strategic"):
for entry in data.get(cat, []):
if not isinstance(entry, dict):
continue
rid = entry.get("id", "?")
kind = entry.get("kind")
label = f"resources/{rid}.kind"
if kind is None:
self._fail(label, "resource missing required `kind` field")
elif kind not in valid_kinds:
self._fail(
label,
f"kind={kind!r} not in canonical ResourceKind {sorted(valid_kinds)}",
)
else:
self._ok(label)
def validate_recipes(self):
"""p2-57: cross-ref public/resources/recipes/recipes.json against the
known building ids and the union of raw resource ids
(`public/resources/resources.json`) plus processed ids
(`public/resources/typed-resources/processed.json`). Fails on any
undeclared resource or unknown building.
"""
recipes_path = self.resources / "recipes" / "recipes.json"
if not recipes_path.exists():
return
data, err = load_json_safe(recipes_path)
if err or not isinstance(data, dict):
self._fail("recipes/recipes.json", f"parse error: {err or 'wrong shape'}")
return
# Known building ids.
bdir = self.resources / "buildings"
building_ids = self._load_id_set_from_split_dir(bdir) if bdir.exists() else set()
if (self.game_data / "buildings").exists():
building_ids |= self._load_id_set_from_split_dir(self.game_data / "buildings")
# Known resource ids: raws from resources.json + processed from
# typed-resources/processed.json.
resource_ids: set[str] = set()
raw_path = self.resources / "resources.json"
if raw_path.exists():
raw_data, _ = load_json_safe(raw_path)
if isinstance(raw_data, dict):
for cat in ("bonus", "luxury", "strategic"):
for entry in raw_data.get(cat, []):
if isinstance(entry, dict) and "id" in entry:
resource_ids.add(entry["id"])
proc_path = self.resources / "typed-resources" / "processed.json"
if proc_path.exists():
proc_data, _ = load_json_safe(proc_path)
if isinstance(proc_data, dict):
for entry in proc_data.get("processed", []):
if isinstance(entry, dict) and "id" in entry:
resource_ids.add(entry["id"])
print(
f"\n recipe cross-refs ({len(building_ids)} buildings, "
f"{len(resource_ids)} resource ids)"
)
recipes = data.get("recipes", [])
if not isinstance(recipes, list):
self._fail("recipes/recipes.json", "`recipes` must be a list")
return
for idx, recipe in enumerate(recipes):
if not isinstance(recipe, dict):
self._fail(f"recipes[{idx}]", f"must be object, got {type(recipe).__name__}")
continue
bid = recipe.get("building_id")
label = f"recipes/{bid or f'<idx {idx}>'}"
if not isinstance(bid, str):
self._fail(label, "missing/non-string building_id")
continue
if bid not in building_ids:
self._fail(label, f"building_id='{bid}' is not a known building")
else:
self._ok(f"{label}.building_id")
for edge_kind in ("consumes", "produces"):
edges = recipe.get(edge_kind, [])
if not isinstance(edges, list):
self._fail(
f"{label}.{edge_kind}",
f"must be a list, got {type(edges).__name__}",
)
continue
for j, edge in enumerate(edges):
elabel = f"{label}.{edge_kind}[{j}]"
if not isinstance(edge, dict):
self._fail(elabel, "edge must be an object")
continue
rid = edge.get("resource")
qty = edge.get("qty_per_turn")
if not isinstance(rid, str):
self._fail(elabel, "missing/non-string resource")
elif rid not in resource_ids:
self._fail(elabel, f"resource='{rid}' not declared in resources.json or typed-resources/processed.json")
else:
self._ok(f"{elabel}.resource")
if not isinstance(qty, int) or qty < 1:
self._fail(elabel, f"qty_per_turn must be int >= 1, got {qty!r}")
else:
self._ok(f"{elabel}.qty_per_turn")
def validate_building_requires_existing(self):
"""p1-43a: every `requires_existing` ladder pointer must resolve to a
real building id. Cross-refs `public/resources/buildings/*.json` only
(post-p1-40 single source of truth)."""
bdir = self.resources / "buildings"
if not bdir.exists():
return
building_ids = self._load_id_set_from_split_dir(bdir)
# Also accept ids from any game-specific override dir, for completeness
if (self.game_data / "buildings").exists():
building_ids |= self._load_id_set_from_split_dir(self.game_data / "buildings")
print(f"\n building requires_existing cross-refs ({len(building_ids)} known ids)")
for f in sorted(bdir.glob("*.json")):
if f.name.endswith(".schema.json") or f.name in ("manifest.json", "building_categories.json"):
continue
for label, entry in self._collect_entries_from_file(f):
prereq = entry.get("requires_existing")
if prereq is None:
continue
ref_label = f"{label}.requires_existing"
if not isinstance(prereq, str):
self._fail(ref_label, f"must be string|null, got {type(prereq).__name__}")
elif prereq not in building_ids:
self._fail(
ref_label,
f"requires_existing='{prereq}' does not resolve to a known building id",
)
else:
self._ok(ref_label)
def validate_cross_refs(self):
"""Cross-reference checks: collectibles → resources, gates_* → units/buildings."""
resources = self._load_resources()
if resources is None:
return
unit_ids = (
self._load_id_set_from_split_dir(self.resources / "units")
| self._load_id_set_from_split_dir(self.game_data / "units")
)
building_ids = (
self._load_id_set_from_split_dir(self.resources / "buildings")
| self._load_id_set_from_split_dir(self.game_data / "buildings")
)
print("\n cross-reference checks")
# resources.json: gates_units / gates_buildings cross-refs
for res_id, entry in resources.items():
for gated_unit in entry.get("gates_units", []):
if gated_unit not in unit_ids:
self._fail(f"resources/{res_id}", f"gates_units references unknown unit '{gated_unit}'")
else:
self._ok(f"resources/{res_id} → units/{gated_unit}")
for gated_bld in entry.get("gates_buildings", []):
if gated_bld not in building_ids:
self._fail(f"resources/{res_id}", f"gates_buildings references unknown building '{gated_bld}'")
else:
self._ok(f"resources/{res_id} → buildings/{gated_bld}")
# biomes.json: collectibles[].resource must exist in resources
biomes = self._load_biomes()
if biomes is None:
return
for biome in biomes:
biome_id = biome.get("id", "?")
for coll in biome.get("collectibles", []):
res_ref = coll.get("resource", "")
if res_ref not in resources:
self._fail(
f"biomes/{biome_id}",
f"collectibles[].resource '{res_ref}' not found in deposits/",
)
else:
self._ok(f"biomes/{biome_id} → resources/{res_ref}")
# ── p2-47: score.json ────────────────────────────────────────────
# Validates the composite score formula weights file that mc-score reads at
# startup. All seven weights must be present and non-negative; the file must
# parse against score.schema.json. No silent defaults — fail if absent.
REQUIRED_SCORE_WEIGHTS: tuple[str, ...] = (
"w_pop", "w_cities", "w_tech", "w_culture",
"w_land", "w_wonders", "w_military",
)
def validate_score(self) -> None:
"""score.json: all seven weights present, non-negative, schema-valid."""
path = self.game_data / "score.json"
if not path.exists():
self._fail("score.json", f"missing at {path.relative_to(self.root)}")
return
data, err = load_json_safe(path)
if err:
self._fail("score.json", f"parse error: {err}")
return
print("\n score.json")
rel = str(path.relative_to(self.root))
# Schema validation.
schema = self._load_schema("score")
if schema is not None:
self._validate_entry(schema, data, rel)
# Per-weight presence + non-negative check (belt-and-suspenders over schema).
weights = data.get("weights") if isinstance(data, dict) else None
if not isinstance(weights, dict):
self._fail(f"{rel}/weights", "must be an object")
return
for key in self.REQUIRED_SCORE_WEIGHTS:
w_label = f"{rel}/weights/{key}"
if key not in weights:
self._fail(w_label, f"required weight '{key}' is absent")
elif not isinstance(weights[key], (int, float)):
self._fail(w_label, f"must be a number, got {type(weights[key]).__name__}")
elif weights[key] < 0:
self._fail(w_label, f"weight must be >= 0, got {weights[key]}")
else:
self._ok(w_label)
# ── Observation gates (p2-61) ───────────────────────────────────
#
# public/resources/observation/gates.json maps tech IDs to lists of
# ObservationRecord field names. The Rust side (mc-observation::GatesDef)
# rejects unknown / innate fields at parse time; this validator cross-checks
# the same constraints from the content side and confirms each referenced
# tech ID exists in the tech registry.
# Canonical gateable field names — must match
# mc-observation::ObservationField::all() exactly.
OBSERVATION_GATEABLE_FIELDS = frozenset({
"pressure", "humidity", "cape",
"canopy_cover", "undergrowth", "fungi_network",
"quality",
"fish_stock", "reef_health",
"habitat_suitability",
"sulfate_aerosol",
})
# Innate fields are always recorded; listing them in gates.json is a bug.
OBSERVATION_INNATE_FIELDS = frozenset({
"temperature", "moisture", "wind_speed", "wind_direction",
"succession_progress",
})
def validate_observation_gates(self) -> None:
path = self.resources / "observation" / "gates.json"
if not path.exists():
# File is optional during transition — skip silently rather than fail.
return
data, err = load_json_safe(path)
if err:
self._fail("observation/gates.json", f"parse error: {err}")
return
print("\n observation/gates.json")
rel = str(path.relative_to(self.root))
if not isinstance(data, dict):
self._fail(rel, "top-level must be an object")
return
techs = data.get("techs")
if not isinstance(techs, dict):
self._fail(f"{rel}/techs", "must be an object mapping tech_id -> [field]")
return
# Build the set of known tech IDs from public/resources/techs/*.json.
known_tech_ids: set[str] = set()
techs_dir = self.resources / "techs"
if techs_dir.is_dir():
for tf in sorted(techs_dir.glob("*.json")):
tdata, _ = load_json_safe(tf)
if isinstance(tdata, list):
for entry in tdata:
if isinstance(entry, dict) and isinstance(entry.get("id"), str):
known_tech_ids.add(entry["id"])
elif isinstance(tdata, dict) and isinstance(tdata.get("id"), str):
known_tech_ids.add(tdata["id"])
for tech_id, fields in techs.items():
label = f"{rel}/techs/{tech_id}"
if not isinstance(fields, list):
self._fail(label, "value must be a list of field names")
continue
if known_tech_ids and tech_id not in known_tech_ids:
self._fail(label, f"tech_id '{tech_id}' not present in resources/techs/*.json")
continue
for fname in fields:
fl = f"{label}[{fname}]"
if not isinstance(fname, str):
self._fail(fl, "field name must be a string")
continue
if fname in self.OBSERVATION_INNATE_FIELDS:
self._fail(fl, "innate fields are always recorded and must not be gated")
continue
if fname not in self.OBSERVATION_GATEABLE_FIELDS:
self._fail(fl, f"unknown ObservationRecord field '{fname}'")
continue
self._ok(fl)
# ── Main ─────────────────────────────────────────────────────────
def run(self):
if not HAS_JSONSCHEMA:
print("ERROR: jsonschema not installed. Run: pip install jsonschema")
sys.exit(1)
print("── Age of Dwarves game data validation ──")
# Single source of truth at resources/<category>/ post-p1-40 migration; the
# game_data fallback paths remain for the transitional period and as a hook
# for genuinely game-specific entities a game pack might author later.
self.validate_split_dir("units", self.resources / "units", "unit")
self.validate_split_dir("buildings", self.resources / "buildings", "building")
if (self.game_data / "units").exists():
self.validate_split_dir("units (game-specific)", self.game_data / "units", "unit")
if (self.game_data / "buildings").exists():
self.validate_split_dir("buildings (game-specific)", self.game_data / "buildings", "building")
# Techs migrated to resources/techs/ post-p1-40 single-source-of-truth;
# validate the canonical location, then fall back to the legacy
# game-specific dir if it still exists during transition.
if (self.resources / "techs").exists():
self.validate_split_dir("techs", self.resources / "techs", "tech")
if (self.game_data / "techs").exists():
self.validate_split_dir("techs (game-specific)", self.game_data / "techs", "tech")
self.validate_tech_domains()
self.validate_split_dir("terrain", self.game_data / "terrain", "terrain")
self.validate_single_file(
"races.json", self.game_data / "races.json", "race", wrap_key="races"
)
self.validate_single_file(
"ai_personalities.json", self.game_data / "ai_personalities.json", "ai_personality"
)
self.validate_wilds()
self.validate_promotions()
self.validate_improvements()
self.validate_specialists()
self.validate_biomes()
self.validate_deposit_concept_refs()
self.validate_resources_kind()
self.validate_recipes()
self.validate_guide_data()
self.validate_building_requires_existing()
self.validate_cross_refs()
self.validate_score()
self.validate_observation_gates()
def report(self) -> int:
print(f"\n{'=' * 60}")
print(f" PASSED: {self.passed} FAILED: {self.failed}")
if self.errors:
print(f"\n Failures:")
for e in self.errors[:30]:
print(f" {e}")
if len(self.errors) > 30:
print(f" ... and {len(self.errors) - 30} more")
print(f"{'=' * 60}")
return 1 if self.failed > 0 else 0
def _run_self_test():
"""Golden bad-data test: biome collectible referencing a nonexistent deposit → caught."""
import tempfile, json
root = Path(tempfile.mkdtemp())
game_data = root / "public" / "games" / "age-of-dwarves" / "data"
schema_dir = game_data / "schemas"
deposits_dir = root / "public" / "resources" / "deposits"
(game_data / "units").mkdir(parents=True)
(game_data / "buildings").mkdir(parents=True)
(root / "public" / "resources" / "improvements").mkdir(parents=True)
(root / "public" / "resources" / "wilds").mkdir(parents=True)
deposits_dir.mkdir(parents=True)
schema_dir.mkdir(parents=True)
# Minimal deposit registry: only "grain" is a valid Game 1 deposit.
# "magesteel_ore" has scope=game_2 so it must be excluded from the registry.
(deposits_dir / "grain.json").write_text(json.dumps(
{"id": "grain", "name": "Grain", "tier": 1, "terrains": ["plains"]}
))
(deposits_dir / "magesteel_ore.json").write_text(json.dumps(
{"id": "magesteel_ore", "name": "Magesteel Ore", "tier": 5,
"terrains": [], "scope": "game_2"}
))
# Biome with three collectible refs: grain (valid), bad_resource (invalid),
# magesteel_ore (game_2-scoped → also invalid for cross-ref).
(game_data / "biomes.json").write_text(json.dumps([
{"id": "plains", "name": "Plains", "collectibles": [
{"resource": "grain", "weight": 0.8},
{"resource": "bad_resource", "weight": 0.3},
{"resource": "magesteel_ore", "weight": 0.1},
]},
]))
# Copy real biome schema needed for the cross-ref pass
real_schema_dir = Path(__file__).parent.parent / "public" / "games" / "age-of-dwarves" / "data" / "schemas"
for s in ["biome.schema.json"]:
src = real_schema_dir / s
if src.exists():
(schema_dir / s).write_text(src.read_text())
v = GameDataValidator(root, verbose=False)
v.validate_cross_refs()
if v.failed == 0:
print("SELF-TEST FAILED: validator did not catch invalid collectible deposit refs")
sys.exit(1)
bad_ref_caught = any("bad_resource" in e for e in v.errors)
if not bad_ref_caught:
print(f"SELF-TEST FAILED: expected 'bad_resource' error, got: {v.errors}")
sys.exit(1)
# magesteel_ore is game_2-scoped → excluded from registry → also caught
magesteel_caught = any("magesteel_ore" in e for e in v.errors)
if not magesteel_caught:
print(f"SELF-TEST FAILED: game_2 deposit 'magesteel_ore' should be caught, got: {v.errors}")
sys.exit(1)
print("SELF-TEST PASSED: invalid and game_2-scoped collectible deposit refs correctly caught")
# Self-test: missing trade_willingness on an ai_personality entry must fail schema validation.
real_ai_schema = real_schema_dir / "ai_personality.schema.json"
if real_ai_schema.exists():
ai_schema = json.loads(real_ai_schema.read_text())
bad_personality = {
"id": "test_clan",
"name": "Test Clan",
"strategic_axes": {
"aggression": 5, "expansion": 5, "production": 5, "wealth": 5,
# trade_willingness intentionally omitted
"grudge_persistence": 5
},
"preferred_early_builds": ["warrior"]
}
v2 = GameDataValidator(root, verbose=False)
v2._validate_entry(ai_schema, bad_personality, "test/missing_trade_willingness")
if v2.failed == 0:
print("SELF-TEST FAILED: missing trade_willingness should have failed schema validation")
sys.exit(1)
print("SELF-TEST PASSED: missing trade_willingness correctly caught by ai_personality schema")
# Self-test: deposit with concept_resource pointing to unknown concept → caught.
test_deposits_dir = root / "public" / "resources" / "deposits"
test_deposits_dir.mkdir(parents=True, exist_ok=True)
(test_deposits_dir / "bad_gem.json").write_text(json.dumps(
{"id": "bad_gem", "name": "Bad Gem", "tier": 3, "terrains": ["hills"],
"scope": "game_1", "concept_resource": "nonexistent_concept"}
))
(root / "public" / "resources" / "resources.json").write_text(json.dumps(
{"luxury": [{"id": "gems", "name": "Gems", "category": "luxury"}]}
))
v3 = GameDataValidator(root, verbose=False)
v3.validate_deposit_concept_refs()
if v3.failed == 0:
print("SELF-TEST FAILED: bad concept_resource 'nonexistent_concept' should have been caught")
sys.exit(1)
if not any("nonexistent_concept" in e for e in v3.errors):
print(f"SELF-TEST FAILED: expected nonexistent_concept error, got: {v3.errors}")
sys.exit(1)
print("SELF-TEST PASSED: unknown concept_resource correctly caught by deposit concept ref check")
# Self-test (p1-43a): a building declaring `requires_existing: "nonexistent"`
# must be caught by validate_building_requires_existing.
test_root = Path(tempfile.mkdtemp())
test_bld_dir = test_root / "public" / "resources" / "buildings"
test_bld_dir.mkdir(parents=True)
(test_bld_dir / "real.json").write_text(json.dumps(
{"id": "real", "name": "Real", "placement": "city", "category": "infrastructure",
"cost": 50, "upkeep": 0}
))
(test_bld_dir / "broken.json").write_text(json.dumps(
{"id": "broken", "name": "Broken", "placement": "city", "category": "infrastructure",
"cost": 100, "upkeep": 0, "requires_existing": "nonexistent"}
))
v4 = GameDataValidator(test_root, verbose=False)
v4.validate_building_requires_existing()
if v4.failed == 0:
print("SELF-TEST FAILED: requires_existing='nonexistent' should have been caught")
sys.exit(1)
if not any("nonexistent" in e for e in v4.errors):
print(f"SELF-TEST FAILED: expected nonexistent error, got: {v4.errors}")
sys.exit(1)
print("SELF-TEST PASSED: dangling requires_existing pointer correctly caught (p1-43a)")
# Self-test (p2-44a): promotions.json missing the required `trees` key
# must be caught by validate_promotions.
promo_root = Path(tempfile.mkdtemp())
promo_dir = promo_root / "public" / "resources" / "promotions"
promo_dir.mkdir(parents=True)
(promo_dir / "promotions.json").write_text(json.dumps(
{"xp_thresholds": [10, 30, 60], "heal_on_promote_percent": 30}
))
v5 = GameDataValidator(promo_root, verbose=False)
v5.validate_promotions()
if v5.failed == 0:
print("SELF-TEST FAILED: promotions.json missing 'trees' should have been caught")
sys.exit(1)
if not any("trees" in e for e in v5.errors):
print(f"SELF-TEST FAILED: expected 'trees' error, got: {v5.errors}")
sys.exit(1)
print("SELF-TEST PASSED: promotions.json missing 'trees' correctly caught (p2-44a)")
def main():
parser = argparse.ArgumentParser(description="Validate Age of Dwarves game pack JSON data")
parser.add_argument("--root", type=Path, default=Path(__file__).parent.parent,
help="Project root directory")
parser.add_argument("--verbose", action="store_true", help="Show individual pass results")
parser.add_argument("--self-test", action="store_true",
help="Run golden bad-data test and exit")
args = parser.parse_args()
if args.self_test:
_run_self_test()
sys.exit(0)
v = GameDataValidator(args.root, verbose=args.verbose)
v.run()
sys.exit(v.report())
if __name__ == "__main__":
main()