#!/usr/bin/env python3 """Validate Age of Dwarves game pack JSON files against their schemas. Schemas live in: public/games/age-of-dwarves/data/schemas/*.schema.json Data sources: - Split dirs: data/units/, data/buildings/, data/techs/, data/terrain/ - Single files: data/races.json, data/ai_personalities.json - Resources: public/resources/wilds/wilds.json - Manifested: data/improvements/ manifest references public/resources/improvements/ Usage: python3 tools/validate-game-data.py [--root /path/to/project] [--verbose] Exit code 0 = all pass, 1 = failures found. """ import argparse import json import sys from pathlib import Path try: from jsonschema import Draft202012Validator, ValidationError HAS_JSONSCHEMA = True except ImportError: HAS_JSONSCHEMA = False def load_json(path: Path): try: return json.loads(path.read_text()) except json.JSONDecodeError as e: return None, str(e) return None, "unknown error" def load_json_safe(path: Path): try: data = json.loads(path.read_text()) return data, None except json.JSONDecodeError as e: return None, str(e) class GameDataValidator: def __init__(self, root: Path, verbose: bool = False): self.root = root self.game_data = root / "public" / "games" / "age-of-dwarves" / "data" self.resources = root / "public" / "resources" self.schema_dir = self.game_data / "schemas" self.verbose = verbose self.passed = 0 self.failed = 0 self.errors: list[str] = [] def _ok(self, label: str): self.passed += 1 if self.verbose: print(f" PASS {label}") def _fail(self, label: str, reason: str): self.failed += 1 msg = f"FAIL {label}: {reason}" self.errors.append(msg) print(f" {msg}") def _load_schema(self, name: str): path = self.schema_dir / f"{name}.schema.json" data, err = load_json_safe(path) if err: self._fail(f"schema/{name}", f"parse error: {err}") return None return data def _validate_entry(self, schema, entry: dict, label: str): if not HAS_JSONSCHEMA: return validator = Draft202012Validator(schema) errs = list(validator.iter_errors(entry)) if errs: for e in errs[:2]: path = ".".join(str(p) for p in e.absolute_path) or "(root)" self._fail(label, f"{path}: {e.message}") else: self._ok(label) def _collect_entries_from_file(self, path: Path) -> list[tuple[str, dict]]: """Extract (label, entry_dict) pairs from a JSON file, handling all DataLoader shapes.""" data, err = load_json_safe(path) if err: self._fail(str(path.relative_to(self.root)), f"parse error: {err}") return [] rel = path.relative_to(self.root) if isinstance(data, list): return [(f"{rel}[{i}]", e) for i, e in enumerate(data) if isinstance(e, dict)] if isinstance(data, dict): # Top-level dict with single "id" = single entry if "id" in data and isinstance(data["id"], str): return [(str(rel), data)] # Keyed collection (ai_personalities shape: {clan_id: {id, name, ...}}) results = [] for key, val in data.items(): if isinstance(val, dict) and ("id" in val or "name" in val): results.append((f"{rel}/{key}", val)) if results: return results # Wrapped array: {"races": [...], "terrains": [...], ...} for wrap_key, wrap_val in data.items(): if isinstance(wrap_val, list): return [(f"{rel}[{i}]", e) for i, e in enumerate(wrap_val) if isinstance(e, dict)] return [] # ── Category validators ─────────────────────────────────────────── def validate_split_dir(self, category_label: str, dir_path: Path, schema_name: str): schema = self._load_schema(schema_name) if schema is None: return files = sorted(f for f in dir_path.glob("*.json") if not f.name.endswith(".schema.json") and f.name not in ("manifest.json", "building_categories.json")) if not files: print(f" (no files in {dir_path.relative_to(self.root)})") return print(f"\n {category_label} ({len(files)} files)") for f in files: for label, entry in self._collect_entries_from_file(f): self._validate_entry(schema, entry, label) def validate_single_file(self, label: str, path: Path, schema_name: str, wrap_key: str | None = None): schema = self._load_schema(schema_name) if schema is None: return data, err = load_json_safe(path) if err: self._fail(label, f"parse error: {err}") return rel = path.relative_to(self.root) print(f"\n {label}") # Unwrap a top-level key if given (e.g. {"races": [...]}) entries_data = data if wrap_key and isinstance(data, dict) and wrap_key in data: entries_data = data[wrap_key] if isinstance(entries_data, list): for i, entry in enumerate(entries_data): if isinstance(entry, dict): self._validate_entry(schema, entry, f"{rel}[{i}]") elif isinstance(entries_data, dict): # Dict-of-dicts (ai_personalities shape) for key, val in entries_data.items(): if isinstance(val, dict): self._validate_entry(schema, val, f"{rel}/{key}") else: self._fail(label, "unexpected top-level type") def validate_wilds(self): schema = self._load_schema("wilds") if schema is None: return path = self.resources / "wilds" / "wilds.json" data, err = load_json_safe(path) if err: self._fail("wilds.json", f"parse error: {err}") return rel = path.relative_to(self.root) print(f"\n wilds") # wilds.json shape: {"wilds": { ... single config object ... }} inner = data.get("wilds", data) if isinstance(data, dict) else data if isinstance(inner, dict): self._validate_entry(schema, inner, str(rel)) else: self._fail(str(rel), "unexpected shape") # ── p2-44a: promotions.json shape ───────────────────────────────── # `promotions.json` is a config-shaped file consumed by DataLoader's raw # path: top-level `trees` dict (keyed by combat-class tree id), an # `xp_thresholds` array, and a `heal_on_promote_percent` integer. The # AI promotion picker (`mc-ai`) and the human modal (`promotion_picker.gd`) # both walk `trees`. Drift in this shape silently breaks the entire # promotion path — no `unit_promoted` events fire — so we lock it down. REQUIRED_PROMOTION_TREES: tuple[str, ...] = ("melee", "ranged", "siege") def validate_promotions(self): path = self.resources / "promotions" / "promotions.json" if not path.exists(): self._fail("promotions.json", f"missing at {path.relative_to(self.root)}") return data, err = load_json_safe(path) if err: self._fail("promotions.json", f"parse error: {err}") return rel = str(path.relative_to(self.root)) print(f"\n promotions.json") if not isinstance(data, dict): self._fail(rel, "top-level must be a JSON object") return for required_key in ("trees", "xp_thresholds", "heal_on_promote_percent"): if required_key not in data: self._fail(rel, f"missing required key '{required_key}'") else: self._ok(f"{rel}/{required_key}") trees = data.get("trees") if not isinstance(trees, dict): self._fail(f"{rel}/trees", "must be an object keyed by tree id") return for tree_id in self.REQUIRED_PROMOTION_TREES: if tree_id not in trees: self._fail(f"{rel}/trees", f"missing required combat-class tree '{tree_id}'") else: tree = trees[tree_id] if not isinstance(tree, dict) or not isinstance(tree.get("levels"), list): self._fail(f"{rel}/trees/{tree_id}", "tree must have a 'levels' array") else: self._ok(f"{rel}/trees/{tree_id}") xp = data.get("xp_thresholds") if not (isinstance(xp, list) and xp and all(isinstance(n, (int, float)) for n in xp)): self._fail(f"{rel}/xp_thresholds", "must be a non-empty array of numbers") else: self._ok(f"{rel}/xp_thresholds[shape]") # ── p1-55: TechDomain enum membership ──────────────────────────── # The canonical 10-value list mirrors `mc_core::TechDomain::ALL`. If # the Rust enum gains/loses a variant, update this list AND the test # in `mc-tech/src/web.rs::all_authored_techs_have_valid_domain`. TECH_DOMAINS: tuple[str, ...] = ( "Military", "Economy", "Industry", "Agriculture", "Governance", "Culture", "Science", "Exploration", "Engineering", "Medicine", ) def validate_tech_domains(self): """Every authored tech in the canonical SSoT must declare `domain` ∈ canonical 10-value enum. Legacy `game_data/techs` is intentionally skipped: per the post-p1-40 architecture, the single source of truth lives at `public/resources/techs/`. """ tech_dirs: list[Path] = [] if (self.resources / "techs").exists(): tech_dirs.append(self.resources / "techs") if not tech_dirs: return print(f"\n tech domain enum membership ({len(self.TECH_DOMAINS)} canonical values)") valid = set(self.TECH_DOMAINS) for tdir in tech_dirs: for f in sorted(tdir.glob("*.json")): if f.name in ("manifest.json",) or f.name.endswith(".schema.json"): continue for label, entry in self._collect_entries_from_file(f): domain = entry.get("domain") if domain is None: self._fail(label, "tech missing required `domain` field") continue if not isinstance(domain, str) or domain not in valid: self._fail( label, f"tech.domain={domain!r} not in canonical TechDomain enum {sorted(valid)}", ) else: self.passed += 1 def validate_improvements(self): """Improvements live in public/resources/improvements/ (not the game data dir).""" schema = self._load_schema("improvement") if schema is None: return imp_dir = self.resources / "improvements" files = sorted(f for f in imp_dir.glob("*.json") if not f.name.endswith(".schema.json") and f.name not in ("manifest.json", "improvements.json", "registry.md")) print(f"\n improvements ({len(files)} files)") for f in files: for label, entry in self._collect_entries_from_file(f): self._validate_entry(schema, entry, label) def validate_specialists(self): """Specialists live in public/resources/specialists/. Every entry must carry a `category` matching the mc_core::WorkerCategory enum (sustenance | construction | wealth).""" schema = self._load_schema("specialist") if schema is None: return spec_dir = self.resources / "specialists" if not spec_dir.exists(): print("\n public/resources/specialists/ (not present — skipping)") return files = sorted(f for f in spec_dir.glob("*.json") if not f.name.endswith(".schema.json") and f.name not in ("manifest.json", "registry.md")) print(f"\n specialists ({len(files)} files)") valid_categories = {"sustenance", "construction", "wealth"} for f in files: for label, entry in self._collect_entries_from_file(f): self._validate_entry(schema, entry, label) cat = entry.get("category") cat_label = f"{label}.category" if cat is None: self._fail(cat_label, "specialist missing required `category`") elif cat not in valid_categories: self._fail(cat_label, f"category={cat!r} not in WorkerCategory {sorted(valid_categories)}") else: self._ok(cat_label) def _load_id_set_from_split_dir(self, dir_path: Path) -> set[str]: """Collect all 'id' values from every JSON entry in a split directory.""" ids: set[str] = set() for f in dir_path.glob("*.json"): if f.name in ("manifest.json", "building_categories.json") or f.name.endswith(".schema.json"): continue for _label, entry in self._collect_entries_from_file(f): if "id" in entry: ids.add(entry["id"]) return ids def _load_resources(self) -> dict[str, dict] | None: """Load the resource/deposit registry from public/resources/deposits/*.json. Entries with scope=="game_2" are excluded — they gate magic units/buildings that don't exist in Game 1 and would produce false cross-ref failures. Returns None with a notice if the directory is absent. """ deposits_dir = self.resources / "deposits" if not deposits_dir.exists(): print("\n public/resources/deposits/ (not present — skipping cross-reference pass)") return None result: dict[str, dict] = {} for f in sorted(deposits_dir.glob("*.json")): if f.name in ("deposits.schema.json", "manifest.json", "registry.md", "deposit_categories.json"): continue data, err = load_json_safe(f) if err or not isinstance(data, dict) or "id" not in data: continue if data.get("scope") == "game_2": continue result[data["id"]] = data return result def _load_biomes(self) -> list[dict] | None: path = self.game_data / "biomes.json" if not path.exists(): print("\n biomes.json (not present — skipping biome cross-reference pass)") return None data, err = load_json_safe(path) if err: self._fail("biomes.json", f"parse error: {err}") return None return data if isinstance(data, list) else data.get("biomes", []) def validate_biomes(self): path = self.game_data / "biomes.json" if not path.exists(): return schema = self._load_schema("biome") if schema is None: return self.validate_single_file("biomes.json", path, "biome", wrap_key="biomes") def _load_concept_ids(self) -> set[str] | None: """Load economic concept IDs from public/resources/resources.json.""" path = self.resources / "resources.json" if not path.exists(): return None data, err = load_json_safe(path) if err: return None ids: set[str] = set() if isinstance(data, dict): for cat_entries in data.values(): if isinstance(cat_entries, list): for entry in cat_entries: if isinstance(entry, dict) and "id" in entry: ids.add(entry["id"]) return ids def validate_deposit_concept_refs(self): """Every Game-1 deposit must have a non-null concept_resource in resources.json.""" concept_ids = self._load_concept_ids() if concept_ids is None: print("\n public/resources/resources.json (not present — skipping concept_resource pass)") return deposits_dir = self.resources / "deposits" if not deposits_dir.exists(): return skip = {"deposit_categories.json", "deposits.schema.json", "registry.md", "magical.json", "marine.json", "mineral.json", "organic.json"} files = sorted(f for f in deposits_dir.glob("*.json") if f.name not in skip) print(f"\n deposit concept_resource refs ({len(files)} files)") for f in files: data, err = load_json_safe(f) if err or not isinstance(data, dict): continue dep_id = data.get("id", f.stem) scope = data.get("scope", "game_1") concept = data.get("concept_resource", "__missing__") label = f"deposits/{dep_id}.concept_resource" if concept == "__missing__": self._fail(label, "field absent — add concept_resource") elif scope == "game_2": self._ok(label) # null allowed for game_2 deposits elif concept is None: self._fail(label, "null concept_resource on a game_1 deposit") elif concept not in concept_ids: self._fail(label, f"'{concept}' not found in resources.json") else: self._ok(label) def validate_guide_data(self): """Validate the four guide-consumed JSON files extracted from hardcoded page enums (p2-32). Each has a minimal schema in data/schemas/.""" print("\n guide-data enums") # homepage-features.json: {"features": [card, ...]} schema = self._load_schema("homepage-features") if schema is not None: path = self.game_data / "homepage-features.json" data, err = load_json_safe(path) if err: self._fail("homepage-features.json", f"parse error: {err}") else: rel = path.relative_to(self.root) for i, card in enumerate(data.get("features", [])): self._validate_entry(schema, card, f"{rel}[features][{i}]") # map-topologies.json: {"topologies": [topology, ...]} schema = self._load_schema("map-topology") if schema is not None: path = self.game_data / "map-topologies.json" data, err = load_json_safe(path) if err: self._fail("map-topologies.json", f"parse error: {err}") else: rel = path.relative_to(self.root) for i, topo in enumerate(data.get("topologies", [])): self._validate_entry(schema, topo, f"{rel}[topologies][{i}]") # episodes/ep1-systems.json: whole-file wrapper validation schema = self._load_schema("episode-systems") if schema is not None: path = self.game_data / "episodes" / "ep1-systems.json" data, err = load_json_safe(path) if err: self._fail("episodes/ep1-systems.json", f"parse error: {err}") else: rel = path.relative_to(self.root) self._validate_entry(schema, data, str(rel)) # shipping-roadmap.json: whole-file wrapper validation schema = self._load_schema("shipping-roadmap") if schema is not None: path = self.game_data / "shipping-roadmap.json" data, err = load_json_safe(path) if err: self._fail("shipping-roadmap.json", f"parse error: {err}") else: rel = path.relative_to(self.root) self._validate_entry(schema, data, str(rel)) def validate_resources_kind(self): """Every resource in public/resources/resources.json must declare a `kind` ∈ {raw, processed} per the production-chain taxonomy (objective p2-57a). Mirrors `mc_core::ResourceKind`. """ path = self.resources / "resources.json" if not path.exists(): return data, err = load_json_safe(path) if err or not isinstance(data, dict): return valid_kinds = {"raw", "processed"} print("\n resources.json kind enum membership") for cat in ("bonus", "luxury", "strategic"): for entry in data.get(cat, []): if not isinstance(entry, dict): continue rid = entry.get("id", "?") kind = entry.get("kind") label = f"resources/{rid}.kind" if kind is None: self._fail(label, "resource missing required `kind` field") elif kind not in valid_kinds: self._fail( label, f"kind={kind!r} not in canonical ResourceKind {sorted(valid_kinds)}", ) else: self._ok(label) def validate_recipes(self): """p2-57: cross-ref public/resources/recipes/recipes.json against the known building ids and the union of raw resource ids (`public/resources/resources.json`) plus processed ids (`public/resources/typed-resources/processed.json`). Fails on any undeclared resource or unknown building. """ recipes_path = self.resources / "recipes" / "recipes.json" if not recipes_path.exists(): return data, err = load_json_safe(recipes_path) if err or not isinstance(data, dict): self._fail("recipes/recipes.json", f"parse error: {err or 'wrong shape'}") return # Known building ids. bdir = self.resources / "buildings" building_ids = self._load_id_set_from_split_dir(bdir) if bdir.exists() else set() if (self.game_data / "buildings").exists(): building_ids |= self._load_id_set_from_split_dir(self.game_data / "buildings") # Known resource ids: raws from resources.json + processed from # typed-resources/processed.json. resource_ids: set[str] = set() raw_path = self.resources / "resources.json" if raw_path.exists(): raw_data, _ = load_json_safe(raw_path) if isinstance(raw_data, dict): for cat in ("bonus", "luxury", "strategic"): for entry in raw_data.get(cat, []): if isinstance(entry, dict) and "id" in entry: resource_ids.add(entry["id"]) proc_path = self.resources / "typed-resources" / "processed.json" if proc_path.exists(): proc_data, _ = load_json_safe(proc_path) if isinstance(proc_data, dict): for entry in proc_data.get("processed", []): if isinstance(entry, dict) and "id" in entry: resource_ids.add(entry["id"]) print( f"\n recipe cross-refs ({len(building_ids)} buildings, " f"{len(resource_ids)} resource ids)" ) recipes = data.get("recipes", []) if not isinstance(recipes, list): self._fail("recipes/recipes.json", "`recipes` must be a list") return for idx, recipe in enumerate(recipes): if not isinstance(recipe, dict): self._fail(f"recipes[{idx}]", f"must be object, got {type(recipe).__name__}") continue bid = recipe.get("building_id") label = f"recipes/{bid or f''}" if not isinstance(bid, str): self._fail(label, "missing/non-string building_id") continue if bid not in building_ids: self._fail(label, f"building_id='{bid}' is not a known building") else: self._ok(f"{label}.building_id") for edge_kind in ("consumes", "produces"): edges = recipe.get(edge_kind, []) if not isinstance(edges, list): self._fail( f"{label}.{edge_kind}", f"must be a list, got {type(edges).__name__}", ) continue for j, edge in enumerate(edges): elabel = f"{label}.{edge_kind}[{j}]" if not isinstance(edge, dict): self._fail(elabel, "edge must be an object") continue rid = edge.get("resource") qty = edge.get("qty_per_turn") if not isinstance(rid, str): self._fail(elabel, "missing/non-string resource") elif rid not in resource_ids: self._fail(elabel, f"resource='{rid}' not declared in resources.json or typed-resources/processed.json") else: self._ok(f"{elabel}.resource") if not isinstance(qty, int) or qty < 1: self._fail(elabel, f"qty_per_turn must be int >= 1, got {qty!r}") else: self._ok(f"{elabel}.qty_per_turn") def validate_building_requires_existing(self): """p1-43a: every `requires_existing` ladder pointer must resolve to a real building id. Cross-refs `public/resources/buildings/*.json` only (post-p1-40 single source of truth).""" bdir = self.resources / "buildings" if not bdir.exists(): return building_ids = self._load_id_set_from_split_dir(bdir) # Also accept ids from any game-specific override dir, for completeness if (self.game_data / "buildings").exists(): building_ids |= self._load_id_set_from_split_dir(self.game_data / "buildings") print(f"\n building requires_existing cross-refs ({len(building_ids)} known ids)") for f in sorted(bdir.glob("*.json")): if f.name.endswith(".schema.json") or f.name in ("manifest.json", "building_categories.json"): continue for label, entry in self._collect_entries_from_file(f): prereq = entry.get("requires_existing") if prereq is None: continue ref_label = f"{label}.requires_existing" if not isinstance(prereq, str): self._fail(ref_label, f"must be string|null, got {type(prereq).__name__}") elif prereq not in building_ids: self._fail( ref_label, f"requires_existing='{prereq}' does not resolve to a known building id", ) else: self._ok(ref_label) def validate_cross_refs(self): """Cross-reference checks: collectibles → resources, gates_* → units/buildings.""" resources = self._load_resources() if resources is None: return unit_ids = ( self._load_id_set_from_split_dir(self.resources / "units") | self._load_id_set_from_split_dir(self.game_data / "units") ) building_ids = ( self._load_id_set_from_split_dir(self.resources / "buildings") | self._load_id_set_from_split_dir(self.game_data / "buildings") ) print("\n cross-reference checks") # resources.json: gates_units / gates_buildings cross-refs for res_id, entry in resources.items(): for gated_unit in entry.get("gates_units", []): if gated_unit not in unit_ids: self._fail(f"resources/{res_id}", f"gates_units references unknown unit '{gated_unit}'") else: self._ok(f"resources/{res_id} → units/{gated_unit}") for gated_bld in entry.get("gates_buildings", []): if gated_bld not in building_ids: self._fail(f"resources/{res_id}", f"gates_buildings references unknown building '{gated_bld}'") else: self._ok(f"resources/{res_id} → buildings/{gated_bld}") # biomes.json: collectibles[].resource must exist in resources biomes = self._load_biomes() if biomes is None: return for biome in biomes: biome_id = biome.get("id", "?") for coll in biome.get("collectibles", []): res_ref = coll.get("resource", "") if res_ref not in resources: self._fail( f"biomes/{biome_id}", f"collectibles[].resource '{res_ref}' not found in deposits/", ) else: self._ok(f"biomes/{biome_id} → resources/{res_ref}") # ── p2-47: score.json ──────────────────────────────────────────── # Validates the composite score formula weights file that mc-score reads at # startup. All seven weights must be present and non-negative; the file must # parse against score.schema.json. No silent defaults — fail if absent. REQUIRED_SCORE_WEIGHTS: tuple[str, ...] = ( "w_pop", "w_cities", "w_tech", "w_culture", "w_land", "w_wonders", "w_military", ) def validate_score(self) -> None: """score.json: all seven weights present, non-negative, schema-valid.""" path = self.game_data / "score.json" if not path.exists(): self._fail("score.json", f"missing at {path.relative_to(self.root)}") return data, err = load_json_safe(path) if err: self._fail("score.json", f"parse error: {err}") return print("\n score.json") rel = str(path.relative_to(self.root)) # Schema validation. schema = self._load_schema("score") if schema is not None: self._validate_entry(schema, data, rel) # Per-weight presence + non-negative check (belt-and-suspenders over schema). weights = data.get("weights") if isinstance(data, dict) else None if not isinstance(weights, dict): self._fail(f"{rel}/weights", "must be an object") return for key in self.REQUIRED_SCORE_WEIGHTS: w_label = f"{rel}/weights/{key}" if key not in weights: self._fail(w_label, f"required weight '{key}' is absent") elif not isinstance(weights[key], (int, float)): self._fail(w_label, f"must be a number, got {type(weights[key]).__name__}") elif weights[key] < 0: self._fail(w_label, f"weight must be >= 0, got {weights[key]}") else: self._ok(w_label) # ── Observation gates (p2-61) ─────────────────────────────────── # # public/resources/observation/gates.json maps tech IDs to lists of # ObservationRecord field names. The Rust side (mc-observation::GatesDef) # rejects unknown / innate fields at parse time; this validator cross-checks # the same constraints from the content side and confirms each referenced # tech ID exists in the tech registry. # Canonical gateable field names — must match # mc-observation::ObservationField::all() exactly. OBSERVATION_GATEABLE_FIELDS = frozenset({ "pressure", "humidity", "cape", "canopy_cover", "undergrowth", "fungi_network", "quality", "fish_stock", "reef_health", "habitat_suitability", "sulfate_aerosol", }) # Innate fields are always recorded; listing them in gates.json is a bug. OBSERVATION_INNATE_FIELDS = frozenset({ "temperature", "moisture", "wind_speed", "wind_direction", "succession_progress", }) def validate_civics(self): """Validate p3-05b/c/d civics catalog files. Each axis lives under public/resources/civics//*.json. Files are single-entry objects (one civic per file). Schema enforces axis enum, snake_case ids, and the union of valid modifier keys. """ schema = self._load_schema("civic") if schema is None: return civics_root = self.resources / "civics" if not civics_root.exists(): print(f"\n (no civics dir at {civics_root.relative_to(self.root)})") return print(f"\n civics") seen_ids: dict[str, str] = {} for axis_dir in sorted(p for p in civics_root.iterdir() if p.is_dir()): axis_name = axis_dir.name files = sorted(f for f in axis_dir.glob("*.json") if not f.name.endswith(".schema.json")) for f in files: rel = str(f.relative_to(self.root)) data, err = load_json_safe(f) if err: self._fail(rel, f"parse error: {err}") continue self._validate_entry(schema, data, rel) # Cross-check directory vs declared axis. declared_axis = data.get("axis") if isinstance(data, dict) else None if declared_axis != axis_name: self._fail( rel, f"axis mismatch: file in {axis_name}/ declares axis={declared_axis!r}", ) # Id uniqueness across all axes. cid = data.get("id") if isinstance(data, dict) else None if isinstance(cid, str): if cid in seen_ids: self._fail( rel, f"duplicate civic id {cid!r} (also in {seen_ids[cid]})", ) else: seen_ids[cid] = rel def validate_observation_gates(self) -> None: path = self.resources / "observation" / "gates.json" if not path.exists(): # File is optional during transition — skip silently rather than fail. return data, err = load_json_safe(path) if err: self._fail("observation/gates.json", f"parse error: {err}") return print("\n observation/gates.json") rel = str(path.relative_to(self.root)) if not isinstance(data, dict): self._fail(rel, "top-level must be an object") return techs = data.get("techs") if not isinstance(techs, dict): self._fail(f"{rel}/techs", "must be an object mapping tech_id -> [field]") return # Build the set of known tech IDs from public/resources/techs/*.json. known_tech_ids: set[str] = set() techs_dir = self.resources / "techs" if techs_dir.is_dir(): for tf in sorted(techs_dir.glob("*.json")): tdata, _ = load_json_safe(tf) if isinstance(tdata, list): for entry in tdata: if isinstance(entry, dict) and isinstance(entry.get("id"), str): known_tech_ids.add(entry["id"]) elif isinstance(tdata, dict) and isinstance(tdata.get("id"), str): known_tech_ids.add(tdata["id"]) for tech_id, fields in techs.items(): label = f"{rel}/techs/{tech_id}" if not isinstance(fields, list): self._fail(label, "value must be a list of field names") continue if known_tech_ids and tech_id not in known_tech_ids: self._fail(label, f"tech_id '{tech_id}' not present in resources/techs/*.json") continue for fname in fields: fl = f"{label}[{fname}]" if not isinstance(fname, str): self._fail(fl, "field name must be a string") continue if fname in self.OBSERVATION_INNATE_FIELDS: self._fail(fl, "innate fields are always recorded and must not be gated") continue if fname not in self.OBSERVATION_GATEABLE_FIELDS: self._fail(fl, f"unknown ObservationRecord field '{fname}'") continue self._ok(fl) # ── Main ───────────────────────────────────────────────────────── def run(self): if not HAS_JSONSCHEMA: print("ERROR: jsonschema not installed. Run: pip install jsonschema") sys.exit(1) print("── Age of Dwarves game data validation ──") # Single source of truth at resources// post-p1-40 migration; the # game_data fallback paths remain for the transitional period and as a hook # for genuinely game-specific entities a game pack might author later. self.validate_split_dir("units", self.resources / "units", "unit") self.validate_split_dir("buildings", self.resources / "buildings", "building") if (self.game_data / "units").exists(): self.validate_split_dir("units (game-specific)", self.game_data / "units", "unit") if (self.game_data / "buildings").exists(): self.validate_split_dir("buildings (game-specific)", self.game_data / "buildings", "building") # Techs migrated to resources/techs/ post-p1-40 single-source-of-truth; # validate the canonical location, then fall back to the legacy # game-specific dir if it still exists during transition. if (self.resources / "techs").exists(): self.validate_split_dir("techs", self.resources / "techs", "tech") if (self.game_data / "techs").exists(): self.validate_split_dir("techs (game-specific)", self.game_data / "techs", "tech") self.validate_tech_domains() self.validate_split_dir("terrain", self.game_data / "terrain", "terrain") self.validate_single_file( "races.json", self.game_data / "races.json", "race", wrap_key="races" ) self.validate_single_file( "ai_personalities.json", self.game_data / "ai_personalities.json", "ai_personality" ) self.validate_wilds() self.validate_promotions() self.validate_improvements() self.validate_specialists() self.validate_biomes() self.validate_deposit_concept_refs() self.validate_resources_kind() self.validate_recipes() self.validate_guide_data() self.validate_building_requires_existing() self.validate_cross_refs() self.validate_score() self.validate_civics() self.validate_observation_gates() def report(self) -> int: print(f"\n{'=' * 60}") print(f" PASSED: {self.passed} FAILED: {self.failed}") if self.errors: print(f"\n Failures:") for e in self.errors[:30]: print(f" {e}") if len(self.errors) > 30: print(f" ... and {len(self.errors) - 30} more") print(f"{'=' * 60}") return 1 if self.failed > 0 else 0 def _run_self_test(): """Golden bad-data test: biome collectible referencing a nonexistent deposit → caught.""" import tempfile, json root = Path(tempfile.mkdtemp()) game_data = root / "public" / "games" / "age-of-dwarves" / "data" schema_dir = game_data / "schemas" deposits_dir = root / "public" / "resources" / "deposits" (game_data / "units").mkdir(parents=True) (game_data / "buildings").mkdir(parents=True) (root / "public" / "resources" / "improvements").mkdir(parents=True) (root / "public" / "resources" / "wilds").mkdir(parents=True) deposits_dir.mkdir(parents=True) schema_dir.mkdir(parents=True) # Minimal deposit registry: only "grain" is a valid Game 1 deposit. # "magesteel_ore" has scope=game_2 so it must be excluded from the registry. (deposits_dir / "grain.json").write_text(json.dumps( {"id": "grain", "name": "Grain", "tier": 1, "terrains": ["plains"]} )) (deposits_dir / "magesteel_ore.json").write_text(json.dumps( {"id": "magesteel_ore", "name": "Magesteel Ore", "tier": 5, "terrains": [], "scope": "game_2"} )) # Biome with three collectible refs: grain (valid), bad_resource (invalid), # magesteel_ore (game_2-scoped → also invalid for cross-ref). (game_data / "biomes.json").write_text(json.dumps([ {"id": "plains", "name": "Plains", "collectibles": [ {"resource": "grain", "weight": 0.8}, {"resource": "bad_resource", "weight": 0.3}, {"resource": "magesteel_ore", "weight": 0.1}, ]}, ])) # Copy real biome schema needed for the cross-ref pass real_schema_dir = Path(__file__).parent.parent / "public" / "games" / "age-of-dwarves" / "data" / "schemas" for s in ["biome.schema.json"]: src = real_schema_dir / s if src.exists(): (schema_dir / s).write_text(src.read_text()) v = GameDataValidator(root, verbose=False) v.validate_cross_refs() if v.failed == 0: print("SELF-TEST FAILED: validator did not catch invalid collectible deposit refs") sys.exit(1) bad_ref_caught = any("bad_resource" in e for e in v.errors) if not bad_ref_caught: print(f"SELF-TEST FAILED: expected 'bad_resource' error, got: {v.errors}") sys.exit(1) # magesteel_ore is game_2-scoped → excluded from registry → also caught magesteel_caught = any("magesteel_ore" in e for e in v.errors) if not magesteel_caught: print(f"SELF-TEST FAILED: game_2 deposit 'magesteel_ore' should be caught, got: {v.errors}") sys.exit(1) print("SELF-TEST PASSED: invalid and game_2-scoped collectible deposit refs correctly caught") # Self-test: missing trade_willingness on an ai_personality entry must fail schema validation. real_ai_schema = real_schema_dir / "ai_personality.schema.json" if real_ai_schema.exists(): ai_schema = json.loads(real_ai_schema.read_text()) bad_personality = { "id": "test_clan", "name": "Test Clan", "strategic_axes": { "aggression": 5, "expansion": 5, "production": 5, "wealth": 5, # trade_willingness intentionally omitted "grudge_persistence": 5 }, "preferred_early_builds": ["warrior"] } v2 = GameDataValidator(root, verbose=False) v2._validate_entry(ai_schema, bad_personality, "test/missing_trade_willingness") if v2.failed == 0: print("SELF-TEST FAILED: missing trade_willingness should have failed schema validation") sys.exit(1) print("SELF-TEST PASSED: missing trade_willingness correctly caught by ai_personality schema") # Self-test: deposit with concept_resource pointing to unknown concept → caught. test_deposits_dir = root / "public" / "resources" / "deposits" test_deposits_dir.mkdir(parents=True, exist_ok=True) (test_deposits_dir / "bad_gem.json").write_text(json.dumps( {"id": "bad_gem", "name": "Bad Gem", "tier": 3, "terrains": ["hills"], "scope": "game_1", "concept_resource": "nonexistent_concept"} )) (root / "public" / "resources" / "resources.json").write_text(json.dumps( {"luxury": [{"id": "gems", "name": "Gems", "category": "luxury"}]} )) v3 = GameDataValidator(root, verbose=False) v3.validate_deposit_concept_refs() if v3.failed == 0: print("SELF-TEST FAILED: bad concept_resource 'nonexistent_concept' should have been caught") sys.exit(1) if not any("nonexistent_concept" in e for e in v3.errors): print(f"SELF-TEST FAILED: expected nonexistent_concept error, got: {v3.errors}") sys.exit(1) print("SELF-TEST PASSED: unknown concept_resource correctly caught by deposit concept ref check") # Self-test (p1-43a): a building declaring `requires_existing: "nonexistent"` # must be caught by validate_building_requires_existing. test_root = Path(tempfile.mkdtemp()) test_bld_dir = test_root / "public" / "resources" / "buildings" test_bld_dir.mkdir(parents=True) (test_bld_dir / "real.json").write_text(json.dumps( {"id": "real", "name": "Real", "placement": "city", "category": "infrastructure", "cost": 50, "upkeep": 0} )) (test_bld_dir / "broken.json").write_text(json.dumps( {"id": "broken", "name": "Broken", "placement": "city", "category": "infrastructure", "cost": 100, "upkeep": 0, "requires_existing": "nonexistent"} )) v4 = GameDataValidator(test_root, verbose=False) v4.validate_building_requires_existing() if v4.failed == 0: print("SELF-TEST FAILED: requires_existing='nonexistent' should have been caught") sys.exit(1) if not any("nonexistent" in e for e in v4.errors): print(f"SELF-TEST FAILED: expected nonexistent error, got: {v4.errors}") sys.exit(1) print("SELF-TEST PASSED: dangling requires_existing pointer correctly caught (p1-43a)") # Self-test (p2-44a): promotions.json missing the required `trees` key # must be caught by validate_promotions. promo_root = Path(tempfile.mkdtemp()) promo_dir = promo_root / "public" / "resources" / "promotions" promo_dir.mkdir(parents=True) (promo_dir / "promotions.json").write_text(json.dumps( {"xp_thresholds": [10, 30, 60], "heal_on_promote_percent": 30} )) v5 = GameDataValidator(promo_root, verbose=False) v5.validate_promotions() if v5.failed == 0: print("SELF-TEST FAILED: promotions.json missing 'trees' should have been caught") sys.exit(1) if not any("trees" in e for e in v5.errors): print(f"SELF-TEST FAILED: expected 'trees' error, got: {v5.errors}") sys.exit(1) print("SELF-TEST PASSED: promotions.json missing 'trees' correctly caught (p2-44a)") def main(): parser = argparse.ArgumentParser(description="Validate Age of Dwarves game pack JSON data") parser.add_argument("--root", type=Path, default=Path(__file__).parent.parent, help="Project root directory") parser.add_argument("--verbose", action="store_true", help="Show individual pass results") parser.add_argument("--self-test", action="store_true", help="Run golden bad-data test and exit") args = parser.parse_args() if args.self_test: _run_self_test() sys.exit(0) v = GameDataValidator(args.root, verbose=args.verbose) v.run() sys.exit(v.report()) if __name__ == "__main__": main()