#!/usr/bin/env python3 """Validate all resource JSON files against their co-located JSON Schemas. Discovers every .schema.json under public/resources/, finds data files in the same directory (and species/ subdirectory for ecology), and validates each one. Also runs structural checks that JSON Schema alone can't enforce. Usage: python tools/validate-schemas.py [--root /path/to/project] [--category ecology/fauna] [--verbose] python tools/validate-schemas.py --check traits # validate trait tag completeness python tools/validate-schemas.py --check refs # validate cross-references (prey, evolved_from) python tools/validate-schemas.py --check trophic # validate trophic pyramid balance python tools/validate-schemas.py --check all # run everything Exit code 0 = all pass, 1 = failures found. """ import argparse import json import sys from collections import Counter, defaultdict from pathlib import Path try: from jsonschema import Draft202012Validator, RefResolver, ValidationError HAS_JSONSCHEMA = True except ImportError: HAS_JSONSCHEMA = False def load_json(path: Path) -> dict | list | None: try: return json.loads(path.read_text()) except json.JSONDecodeError as e: return None class SchemaValidator: def __init__(self, root: Path, verbose: bool = False): self.root = root self.resources = root / "public" / "resources" self.verbose = verbose self.passed = 0 self.failed = 0 self.skipped = 0 self.errors: list[str] = [] def error(self, msg: str): self.errors.append(msg) self.failed += 1 def ok(self, msg: str = ""): self.passed += 1 if self.verbose and msg: print(f" ✓ {msg}") # ── Schema discovery ────────────────────────────────────────────── def find_schemas(self, category_filter: str | None = None) -> list[tuple[Path, Path]]: """Find (schema_path, data_dir) pairs. data_dir contains the files to validate.""" pairs = [] for schema_path in sorted(self.resources.rglob("*.schema.json")): rel = schema_path.relative_to(self.resources) if category_filter and category_filter not in str(rel): continue data_dir = schema_path.parent # For ecology schemas, species data is in a species/ subdirectory species_dir = data_dir / "species" if species_dir.is_dir(): pairs.append((schema_path, species_dir)) else: pairs.append((schema_path, data_dir)) return pairs # ── JSON Schema validation ──────────────────────────────────────── def validate_against_schema(self, schema_path: Path, data_dir: Path): if not HAS_JSONSCHEMA: print("⚠ jsonschema not installed — skipping schema validation (pip install jsonschema)") return schema = load_json(schema_path) if schema is None: self.error(f"PARSE ERROR in schema: {schema_path}") return # Build a resolver that can follow $ref to base.schema.json and sibling schemas. # Register schemas by $id, by resolved file path, AND by file:// URI so # relative $refs (../base.schema.json, ../../base.schema.json) resolve correctly. store = {} for ref_schema_path in self.resources.rglob("*.schema.json"): ref_schema = load_json(ref_schema_path) if not ref_schema: continue resolved_path = str(ref_schema_path.resolve()) if "$id" in ref_schema: store[ref_schema["$id"]] = ref_schema store[resolved_path] = ref_schema store[f"file://{resolved_path}"] = ref_schema def file_handler(uri: str): """Resolve file:// URIs by checking store, disk, and fallback to resources/.""" path_str = uri.replace("file://", "").replace("file:", "") p = Path(path_str) resolved = str(p.resolve()) # Check store by resolved path or file URI for key in [resolved, f"file://{resolved}", uri]: if key in store: return store[key] # Check disk if p.exists(): data = load_json(p) if data: store[resolved] = data return data # Fallback: schema $refs may resolve incorrectly due to trailing slashes. # Try finding the filename in resources/ directory tree. filename = p.name for candidate in self.resources.rglob(filename): data = load_json(candidate) if data: store[resolved] = data return data raise FileNotFoundError(f"Schema not found: {uri}") resolver = RefResolver( base_uri=f"file://{schema_path.parent.resolve()}/", referrer=schema, store=store, handlers={"file": file_handler}, ) # Determine which $def to validate species files against # Most schemas use oneOf with multiple variants; species files match fauna_species/flora_species species_def = None if "oneOf" in schema: for ref in schema["oneOf"]: ref_path = ref.get("$ref", "") if "species" in ref_path: species_def = ref_path break category = schema_path.stem.replace(".schema", "") data_files = sorted(data_dir.glob("*.json")) data_files = [f for f in data_files if not f.name.endswith(".schema.json")] if not data_files: if self.verbose: print(f" (no data files in {data_dir.relative_to(self.root)})") return for data_file in data_files: data = load_json(data_file) if data is None: self.error(f"PARSE ERROR: {data_file.relative_to(self.root)}") continue # If we identified a species $def and the file has "id" + "name" (species format), # validate against that specific $def instead of the oneOf root if species_def and isinstance(data, dict) and "id" in data and "name" in data: try: resolved = resolver.resolve(species_def) sub_schema = resolved[1] validator = Draft202012Validator(sub_schema, resolver=resolver) errs = list(validator.iter_errors(data)) if errs: for e in errs[:3]: path = ".".join(str(p) for p in e.absolute_path) self.error(f"SCHEMA: {data_file.name}: {path}: {e.message}") else: self.ok(data_file.name) except Exception as e: # Fall back to root schema validation try: validator = Draft202012Validator(schema, resolver=resolver) errs = list(validator.iter_errors(data)) if errs: for err in errs[:3]: self.error(f"SCHEMA: {data_file.name}: {err.message}") else: self.ok(data_file.name) except Exception as e2: self.error(f"VALIDATOR ERROR: {data_file.name}: {e2}") else: try: validator = Draft202012Validator(schema, resolver=resolver) errs = list(validator.iter_errors(data)) if errs: for e in errs[:3]: self.error(f"SCHEMA: {data_file.name}: {e.message}") else: self.ok(data_file.name) except Exception as e: self.error(f"VALIDATOR ERROR: {data_file.name}: {e}") # ── Structural checks ───────────────────────────────────────────── def check_traits(self): """Verify all fauna species have the 7 required trait categories.""" print("\n── Trait completeness ──") required_prefixes = ["size_", "diet_", "habitat_", "locomotion_", "thermal_", "repro_", "social_"] species_dir = self.resources / "ecology" / "fauna" / "species" for f in sorted(species_dir.glob("*.json")): data = load_json(f) if data is None or "traits" not in data: continue traits = data["traits"] for prefix in required_prefixes: if not any(t.startswith(prefix) for t in traits): self.error(f"TRAIT: {f.name}: missing {prefix}* trait") if all(any(t.startswith(p) for t in traits) for p in required_prefixes): self.ok(f.name) def check_refs(self): """Verify prey, evolved_from, flora_dependencies reference existing species.""" print("\n── Cross-reference integrity ──") fauna_dir = self.resources / "ecology" / "fauna" / "species" flora_dir = self.resources / "ecology" / "flora" / "species" fauna_ids = set() flora_ids = set() all_species: dict[str, dict] = {} for f in fauna_dir.glob("*.json"): data = load_json(f) if data and "id" in data: fauna_ids.add(data["id"]) all_species[data["id"]] = data for f in flora_dir.glob("*.json"): data = load_json(f) if data and "id" in data: flora_ids.add(data["id"]) all_ids = fauna_ids | flora_ids broken_prey = [] broken_evolved = [] broken_flora_deps = [] for sid, sp in all_species.items(): for prey_id in sp.get("prey", []): if prey_id not in all_ids: broken_prey.append(f"{sid} → {prey_id}") evolved = sp.get("evolved_from") if evolved and evolved not in fauna_ids: broken_evolved.append(f"{sid} → {evolved}") for dep in sp.get("flora_dependencies", []): if dep not in flora_ids: broken_flora_deps.append(f"{sid} → {dep}") if broken_prey: for ref in broken_prey: self.error(f"PREY REF: {ref} (target does not exist)") else: self.ok(f"All prey references valid ({len(all_species)} species checked)") if broken_evolved: for ref in broken_evolved: self.error(f"EVOLVED_FROM REF: {ref} (target does not exist)") else: self.ok(f"All evolved_from references valid") if broken_flora_deps: for ref in broken_flora_deps: self.error(f"FLORA_DEP REF: {ref} (target does not exist)") else: self.ok(f"All flora_dependencies references valid") def check_trophic(self): """Verify trophic pyramid and lineage structure.""" print("\n── Trophic & lineage structure ──") fauna_dir = self.resources / "ecology" / "fauna" / "species" trophic = Counter() lineage_tiers: dict[str, set[int]] = defaultdict(set) biome_trophic: dict[str, Counter] = defaultdict(Counter) domain_count = Counter() lineage_count = Counter() for f in sorted(fauna_dir.glob("*.json")): data = load_json(f) if data is None or "id" not in data: continue tl = data.get("trophic_level", "unknown") trophic[tl] += 1 domain_count[data.get("domain", "unknown")] += 1 lin = data.get("lineage", "") tier = data.get("ecology_tier", 0) if lin: lineage_tiers[lin].add(tier) lineage_count[lin] += 1 for biome in data.get("biomes", []): biome_trophic[biome][tl] += 1 # Trophic pyramid. Real ecosystems have more herbivores than predators by a wide # margin, but for a game roster we accept predators ≤ 1.15× (herbivores + omnivores) # since omnivores partially fill the prey-base role. Strictly inverted pyramids # (more pure predators than pure herbivores + omnivores combined) indicate a real # data gap. herb = trophic.get("herbivore", 0) pred = trophic.get("predator", 0) + trophic.get("apex_predator", 0) omni = trophic.get("omnivore", 0) total = sum(trophic.values()) prey_base = herb + omni ratio = pred / prey_base if prey_base > 0 else float('inf') print(f" Trophic: herbivore={herb} omnivore={omni} predator={pred} total={total} ratio={ratio:.2f}") if ratio > 1.15: self.error( f"TROPHIC: predators ({pred}) exceed 1.15× prey base ({prey_base}), " f"ratio={ratio:.2f} — inverted pyramid" ) else: self.ok(f"Trophic pyramid: predator ratio {ratio:.2f} ≤ 1.15") # Lineages missing T1. Some lineages legitimately start above T1: # - pinnipeds: seals/walruses are transitional marine mammals, inherently specialized; # no meaningful T1 "primitive seal" exists that isn't already a felid/mustelid t1_exempt_lineages = {"pinnipeds"} missing_t1 = [l for l, tiers in lineage_tiers.items() if 1 not in tiers and l != "fantasy" and l not in t1_exempt_lineages] if missing_t1: for l in sorted(missing_t1): tiers = sorted(lineage_tiers[l]) self.error(f"LINEAGE: {l} has no T1 root (tiers: {tiers})") else: self.ok(f"All {len(lineage_tiers)} lineages have T1 roots") # Lineages with tier gaps > 2 for l, tiers in sorted(lineage_tiers.items()): sorted_tiers = sorted(t for t in tiers if t > 0) if len(sorted_tiers) < 2: continue for i in range(len(sorted_tiers) - 1): gap = sorted_tiers[i + 1] - sorted_tiers[i] if gap > 2: self.error(f"TIER GAP: {l} has T{sorted_tiers[i]}→T{sorted_tiers[i+1]} (gap={gap})") # Biomes with predators but no herbivores. # Extreme biomes (volcanic, ice, deep) legitimately lack herbivores — # their food chains run through detritivores/chemosynthesis/filter feeders. extreme_biomes = { "lava_field", "volcanic", "volcanic_plains", "ice", "sea_ice", "hadal_zone", "abyssal_plain", "ancient_lakebed", "rocky_waste", "basalt_highland", "canyon", "coastal_cliffs", "cliffs", } broken_biomes = [] for biome, counts in sorted(biome_trophic.items()): if biome in extreme_biomes: continue has_pred = counts.get("predator", 0) + counts.get("apex_predator", 0) > 0 has_herb = counts.get("herbivore", 0) + counts.get("omnivore", 0) > 0 if has_pred and not has_herb: broken_biomes.append(biome) if broken_biomes: for b in broken_biomes: self.error(f"BIOME CHAIN: {b} has predators but no herbivores/omnivores") else: self.ok("All biomes with predators have prey species") # Domain balance print(f" Domain: {dict(domain_count)}") def check_lineage_ecology_tier(self): """Verify all fauna have lineage + ecology_tier, all flora have lineage + quality_tier.""" print("\n── Required fields ──") fauna_dir = self.resources / "ecology" / "fauna" / "species" flora_dir = self.resources / "ecology" / "flora" / "species" for f in sorted(fauna_dir.glob("*.json")): data = load_json(f) if data is None or "id" not in data: continue if not data.get("lineage"): self.error(f"FIELD: {f.name}: missing lineage") if not data.get("ecology_tier"): self.error(f"FIELD: {f.name}: missing ecology_tier") for f in sorted(flora_dir.glob("*.json")): data = load_json(f) if data is None or "id" not in data: continue if not data.get("lineage"): self.error(f"FIELD: {f.name}: missing lineage") if not data.get("quality_tier"): self.error(f"FIELD: {f.name}: missing quality_tier") # ── Runner ──────────────────────────────────────────────────────── def run_schema_validation(self, category: str | None = None): print("── JSON Schema validation ──") pairs = self.find_schemas(category) for schema_path, data_dir in pairs: rel_schema = schema_path.relative_to(self.root) rel_data = data_dir.relative_to(self.root) n_files = len(list(data_dir.glob("*.json"))) - len(list(data_dir.glob("*.schema.json"))) if n_files == 0: continue print(f"\n {rel_schema} → {rel_data} ({n_files} files)") self.validate_against_schema(schema_path, data_dir) def run_all(self, category: str | None = None): self.run_schema_validation(category) self.check_lineage_ecology_tier() self.check_traits() self.check_refs() self.check_trophic() def report(self) -> int: print(f"\n{'═' * 60}") print(f" PASSED: {self.passed} FAILED: {self.failed}") if self.errors: print(f"\n Failures:") for e in self.errors[:50]: print(f" ✗ {e}") if len(self.errors) > 50: print(f" ... and {len(self.errors) - 50} more") print(f"{'═' * 60}") return 1 if self.failed > 0 else 0 def main(): parser = argparse.ArgumentParser(description="Validate resource JSON against schemas") parser.add_argument("--root", type=Path, default=Path(__file__).parent.parent, help="Project root directory") parser.add_argument("--category", type=str, default=None, help="Filter to schema category (e.g. ecology/fauna)") parser.add_argument("--check", type=str, default="all", choices=["all", "schema", "traits", "refs", "trophic", "fields"], help="Which checks to run") parser.add_argument("--verbose", action="store_true", help="Show individual pass results") args = parser.parse_args() v = SchemaValidator(args.root, verbose=args.verbose) if args.check == "all": v.run_all(args.category) elif args.check == "schema": v.run_schema_validation(args.category) elif args.check == "traits": v.check_traits() elif args.check == "refs": v.check_refs() elif args.check == "trophic": v.check_trophic() elif args.check == "fields": v.check_lineage_ecology_tier() sys.exit(v.report()) if __name__ == "__main__": main()