#!/usr/bin/env python3 """Ecology data cross-reference validator. Validates all cross-references between ecology JSON data files under public/resources/ecology/. Run after any data change to catch inconsistencies. Usage: python tools/validate-ecology-data.py [--root /path/to/project] """ import argparse import json import sys from pathlib import Path def load_json(path: Path) -> dict | list | None: try: return json.loads(path.read_text()) except (FileNotFoundError, json.JSONDecodeError) as e: return None class EcologyValidator: def __init__(self, root: Path): self.eco = root / "public" / "resources" / "ecology" self.worlds = root / "public" / "resources" / "worlds" self.passed = 0 self.failed = 0 self.errors: list[str] = [] self.biome_ids: set[str] = set() self.trait_enums: dict[str, set[str]] = {} self.all_trait_values: set[str] = set() def ok(self, msg: str) -> None: self.passed += 1 print(f" \u2713 {msg}") def fail(self, msg: str) -> None: self.failed += 1 self.errors.append(msg) print(f" \u2717 {msg}") def load_biome_ids(self) -> None: # ecology/biomes/ holds biome definitions in TWO shapes: an aggregate # file wrapping a `biomes` array, and per-biome singleton files shaped # `{ "id": ..., "name": ..., ... }`. Both are real biome definitions, so # the id universe is the union of both (mirrors the file-or-array # convention the runtime DataLoader merges). biome_dir = self.eco / "biomes" for f in sorted(biome_dir.glob("*.json")): if "schema" in f.name: continue data = load_json(f) if not data: continue if isinstance(data, dict) and "biomes" in data: for b in data["biomes"]: self.biome_ids.add(b["id"]) elif isinstance(data, dict) and "id" in data: self.biome_ids.add(data["id"]) # Worlds also declare runtime biome labels (e.g. `permanent_ice`, # `volcano`) that `normalizes_to` a canonical ecology biome. These are # valid keys for biome_trait_weights.json (the weights file already # carries other runtime labels like `ice`/`grassland`/`plains`), so the # id universe must include them. Without this the validator would flag # legitimate runtime-label weights as "unknown". if self.worlds.is_dir(): for f in sorted(self.worlds.glob("*/runtime_biomes.json")): data = load_json(f) if not data: continue runtime = data.get("runtime_biomes", data) if isinstance(data, dict) else data if isinstance(runtime, list): for b in runtime: if isinstance(b, dict) and "id" in b: self.biome_ids.add(b["id"]) elif isinstance(runtime, dict): self.biome_ids.update(runtime.keys()) def load_trait_enums(self) -> None: data = load_json(self.eco / "traits" / "trait_definitions.json") if not data: self.fail("trait_definitions.json: file missing or invalid") return for cat, info in data["categories"].items(): vals = set(info["values"]) self.trait_enums[cat] = vals self.all_trait_values.update(vals) def check_biome_trait_weight_ids(self) -> None: data = load_json(self.eco / "traits" / "biome_trait_weights.json") if not data: self.fail("biome_trait_weights.json: file missing or invalid") return bad = [bid for bid in data if bid not in self.biome_ids] if bad: self.fail(f"biome_trait_weights.json: unknown biome IDs: {bad}") else: self.ok(f"biome_trait_weights: all {len(data)} biome IDs valid") def check_biome_trait_weight_values(self) -> None: data = load_json(self.eco / "traits" / "biome_trait_weights.json") if not data: return bad = [] for biome, categories in data.items(): for cat, vals in categories.items(): if cat not in self.trait_enums: bad.append(f"{biome}: unknown category '{cat}'") continue for v in vals: if v not in self.trait_enums[cat]: bad.append(f"{biome}.{cat}: unknown value '{v}'") if bad: for b in bad: self.fail(f"biome_trait_weights.json: {b}") else: self.ok("biome_trait_weights: all trait values valid") def check_biome_trait_weight_sums(self) -> None: data = load_json(self.eco / "traits" / "biome_trait_weights.json") if not data: return bad = [] for biome, categories in data.items(): for cat, vals in categories.items(): total = sum(vals.values()) if total != 100: bad.append(f"{biome}.{cat} = {total}") if bad: for b in bad: self.fail(f"biome_trait_weights.json: sum != 100: {b}") else: self.ok(f"biome_trait_weights: all category sums = 100") def check_quality_ranges(self) -> None: biome_dir = self.eco / "biomes" bad = [] count = 0 for f in sorted(biome_dir.glob("*.json")): if "schema" in f.name: continue data = load_json(f) if not data or "biomes" not in data: continue for b in data["biomes"]: qr = b.get("quality_range", []) count += 1 if len(qr) != 2: bad.append(f"{b['id']} in {f.name}: quality_range has {len(qr)} elements") elif not (1 <= qr[0] <= qr[1] <= 10): bad.append(f"{b['id']} in {f.name}: quality_range {qr} violates 1 <= min <= max <= 10") if bad: for b in bad: self.fail(f"biomes: {b}") else: self.ok(f"biomes: all {count} quality_ranges valid [1..10]") def check_food_yield_keys(self) -> None: data = load_json(self.eco / "ecosystem" / "food_yield.json") if not data: self.fail("food_yield.json: file missing or invalid") return mults = data.get("quality_multipliers", {}) expected = {str(i) for i in range(1, 11)} actual = set(mults.keys()) if actual != expected: missing = expected - actual extra = actual - expected msg = [] if missing: msg.append(f"missing: {sorted(missing)}") if extra: msg.append(f"extra: {sorted(extra)}") self.fail(f"food_yield.json: quality_multipliers keys: {', '.join(msg)}") else: self.ok("food_yield.json: quality_multipliers has keys 1-10") def check_ability_ids_unique(self) -> None: data = load_json(self.eco / "fauna" / "abilities.json") if not data: self.fail("abilities.json: file missing or invalid") return ids = [] for section in ["trait_based", "tier_gated"]: for a in data.get(section, []): ids.append(a["id"]) seen = set() dupes = [] for aid in ids: if aid in seen: dupes.append(aid) seen.add(aid) if dupes: self.fail(f"abilities.json: duplicate IDs: {dupes}") else: self.ok(f"abilities.json: all {len(ids)} ability IDs unique") def _check_trait_refs(self, applies_to: dict, context: str) -> list[str]: bad = [] for cat, vals in applies_to.items(): if cat == "min_tier": continue if cat not in self.trait_enums: bad.append(f"{context}: unknown category '{cat}'") continue if isinstance(vals, list): for v in vals: if v not in self.trait_enums[cat]: bad.append(f"{context}: unknown {cat} value '{v}'") return bad def check_behavior_trait_refs(self) -> None: data = load_json(self.eco / "fauna" / "behaviors.json") if not data: self.fail("behaviors.json: file missing or invalid") return bad = [] for b in data.get("behaviors", []): bad.extend(self._check_trait_refs(b.get("applies_to", {}), f"behavior '{b['id']}'")) if bad: for b in bad: self.fail(f"behaviors.json: {b}") else: self.ok(f"behaviors.json: all trait references valid") def check_ability_trait_refs(self) -> None: data = load_json(self.eco / "fauna" / "abilities.json") if not data: return bad = [] for section in ["trait_based", "tier_gated"]: for a in data.get(section, []): bad.extend(self._check_trait_refs(a.get("trigger", {}), f"ability '{a['id']}'")) if bad: for b in bad: self.fail(f"abilities.json: {b}") else: self.ok("abilities.json: all trigger trait references valid") def check_social_multiplier_keys(self) -> None: data = load_json(self.eco / "fauna" / "social_multipliers.json") if not data: self.fail("social_multipliers.json: file missing or invalid") return social_vals = self.trait_enums.get("social", set()) bad = [] for field in ["grazing_multiplier", "typical_group_size"]: if field not in data: continue for k in data[field]: if k not in social_vals: bad.append(f"{field}: unknown social value '{k}'") if bad: for b in bad: self.fail(f"social_multipliers.json: {b}") else: self.ok("social_multipliers.json: all social keys valid") def check_maturity_biome_refs(self) -> None: data = load_json(self.eco / "flora" / "maturity.json") if not data: self.fail("maturity.json: file missing or invalid") return soil = data.get("soil_max_by_biome", {}) bad = [bid for bid in soil if bid not in self.biome_ids] if bad: self.fail(f"maturity.json: soil_max references unknown biomes: {bad}") else: self.ok(f"maturity.json: all {len(soil)} soil_max biome IDs valid") def check_combat_stats_size_keys(self) -> None: data = load_json(self.eco / "fauna" / "combat_stats.json") if not data: self.fail("combat_stats.json: file missing or invalid") return size_vals = self.trait_enums.get("size", set()) bad = [] for field in ["hp_by_size", "atk_size_base", "def_by_size", "armor_by_size"]: if field not in data: continue for k in data[field]: if k not in size_vals: bad.append(f"{field}: unknown size '{k}'") diet_vals = self.trait_enums.get("diet", set()) for k in data.get("atk_diet_multiplier", {}): if k not in diet_vals: bad.append(f"atk_diet_multiplier: unknown diet '{k}'") loco_vals = self.trait_enums.get("locomotion", set()) for k in data.get("movement_by_locomotion", {}): if k not in loco_vals: bad.append(f"movement_by_locomotion: unknown locomotion '{k}'") for rule in data.get("attack_type_rules", []): for cat, vals in rule.get("match", {}).items(): if cat in self.trait_enums: for v in vals: if v not in self.trait_enums[cat]: bad.append(f"attack_type_rules: unknown {cat} value '{v}'") if bad: for b in bad: self.fail(f"combat_stats.json: {b}") else: self.ok("combat_stats.json: all enum keys valid") def check_constraint_trait_refs(self) -> None: data = load_json(self.eco / "traits" / "trait_constraints.json") if not data: self.fail("trait_constraints.json: file missing or invalid") return bad = [] for pair in data.get("invalid_pairs", []): for key in ["a", "b"]: val = pair.get(key, "") if val not in self.all_trait_values: bad.append(f"invalid_pairs: unknown trait value '{val}'") if bad: for b in bad: self.fail(f"trait_constraints.json: {b}") else: self.ok(f"trait_constraints.json: all {len(data.get('invalid_pairs', []))} constraint refs valid") def check_grazing_by_size(self) -> None: data = load_json(self.eco / "fauna" / "land.json") if not data: self.fail("land.json: file missing or invalid") return size_vals = self.trait_enums.get("size", set()) grazing = data.get("grazing_by_size", {}) bad = [k for k in grazing if k not in size_vals] if bad: self.fail(f"land.json: grazing_by_size has unknown sizes: {bad}") else: self.ok(f"land.json: grazing_by_size keys match size enum") def check_tier_thresholds(self) -> None: for fname, key_prefix in [ ("ecosystem/health.json", "t"), ("ecosystem/tier_classification.json", ""), ]: data = load_json(self.eco / fname) if not data: self.fail(f"{fname}: file missing or invalid") continue thresholds = data.get("tier_thresholds", {}) expected_keys = {f"{key_prefix}{i}" for i in range(2, 11)} actual_keys = set(thresholds.keys()) if actual_keys != expected_keys: missing = expected_keys - actual_keys extra = actual_keys - expected_keys msg = [] if missing: msg.append(f"missing: {sorted(missing)}") if extra: msg.append(f"extra: {sorted(extra)}") self.fail(f"{fname}: tier_thresholds keys: {', '.join(msg)}") continue ordered_keys = [f"{key_prefix}{i}" for i in range(2, 11)] values = [thresholds[k] for k in ordered_keys] monotonic = all(a < b for a, b in zip(values, values[1:])) if not monotonic: self.fail(f"{fname}: tier_thresholds not monotonically increasing: {values}") else: self.ok(f"{fname}: tier_thresholds monotonically increasing T2-T10") def run(self) -> int: print("Loading reference data...") self.load_biome_ids() self.load_trait_enums() print(f" Found {len(self.biome_ids)} biome IDs, {len(self.trait_enums)} trait categories\n") if not self.biome_ids or not self.trait_enums: print("Cannot proceed without biome IDs and trait definitions.") return 1 print("Running checks...") self.check_biome_trait_weight_ids() self.check_biome_trait_weight_values() self.check_biome_trait_weight_sums() self.check_quality_ranges() self.check_food_yield_keys() self.check_ability_ids_unique() self.check_ability_trait_refs() self.check_behavior_trait_refs() self.check_social_multiplier_keys() self.check_maturity_biome_refs() self.check_combat_stats_size_keys() self.check_constraint_trait_refs() self.check_grazing_by_size() self.check_tier_thresholds() total = self.passed + self.failed print(f"\nRESULT: {self.passed}/{total} checks passed", end="") if self.failed: print(f", {self.failed} failed") else: print() return 0 if self.failed == 0 else 1 def main() -> None: parser = argparse.ArgumentParser(description="Validate ecology data cross-references") parser.add_argument("--root", type=Path, default=None, help="Project root (default: auto-detect from script location)") args = parser.parse_args() if args.root: root = args.root.resolve() else: root = Path(__file__).resolve().parent.parent eco_dir = root / "public" / "resources" / "ecology" if not eco_dir.is_dir(): print(f"Error: ecology data directory not found at {eco_dir}") sys.exit(1) print(f"Validating ecology data in {eco_dir}\n") validator = EcologyValidator(root) sys.exit(validator.run()) if __name__ == "__main__": main()