magicciv/tools/validate-schemas.py
2026-04-07 17:52:04 -07:00

460 lines
19 KiB
Python
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Validate all resource JSON files against their co-located JSON Schemas.
Discovers every .schema.json under public/resources/, finds data files in the
same directory (and species/ subdirectory for ecology), and validates each one.
Also runs structural checks that JSON Schema alone can't enforce.
Usage:
python tools/validate-schemas.py [--root /path/to/project] [--category ecology/fauna] [--verbose]
python tools/validate-schemas.py --check traits # validate trait tag completeness
python tools/validate-schemas.py --check refs # validate cross-references (prey, evolved_from)
python tools/validate-schemas.py --check trophic # validate trophic pyramid balance
python tools/validate-schemas.py --check all # run everything
Exit code 0 = all pass, 1 = failures found.
"""
import argparse
import json
import sys
from collections import Counter, defaultdict
from pathlib import Path
try:
from jsonschema import Draft202012Validator, RefResolver, ValidationError
HAS_JSONSCHEMA = True
except ImportError:
HAS_JSONSCHEMA = False
def load_json(path: Path) -> dict | list | None:
try:
return json.loads(path.read_text())
except json.JSONDecodeError as e:
return None
class SchemaValidator:
def __init__(self, root: Path, verbose: bool = False):
self.root = root
self.resources = root / "public" / "resources"
self.verbose = verbose
self.passed = 0
self.failed = 0
self.skipped = 0
self.errors: list[str] = []
def error(self, msg: str):
self.errors.append(msg)
self.failed += 1
def ok(self, msg: str = ""):
self.passed += 1
if self.verbose and msg:
print(f"{msg}")
# ── Schema discovery ──────────────────────────────────────────────
def find_schemas(self, category_filter: str | None = None) -> list[tuple[Path, Path]]:
"""Find (schema_path, data_dir) pairs. data_dir contains the files to validate."""
pairs = []
for schema_path in sorted(self.resources.rglob("*.schema.json")):
rel = schema_path.relative_to(self.resources)
if category_filter and category_filter not in str(rel):
continue
data_dir = schema_path.parent
# For ecology schemas, species data is in a species/ subdirectory
species_dir = data_dir / "species"
if species_dir.is_dir():
pairs.append((schema_path, species_dir))
else:
pairs.append((schema_path, data_dir))
return pairs
# ── JSON Schema validation ────────────────────────────────────────
def validate_against_schema(self, schema_path: Path, data_dir: Path):
if not HAS_JSONSCHEMA:
print("⚠ jsonschema not installed — skipping schema validation (pip install jsonschema)")
return
schema = load_json(schema_path)
if schema is None:
self.error(f"PARSE ERROR in schema: {schema_path}")
return
# Build a resolver that can follow $ref to base.schema.json and sibling schemas.
# Register schemas by $id, by resolved file path, AND by file:// URI so
# relative $refs (../base.schema.json, ../../base.schema.json) resolve correctly.
store = {}
for ref_schema_path in self.resources.rglob("*.schema.json"):
ref_schema = load_json(ref_schema_path)
if not ref_schema:
continue
resolved_path = str(ref_schema_path.resolve())
if "$id" in ref_schema:
store[ref_schema["$id"]] = ref_schema
store[resolved_path] = ref_schema
store[f"file://{resolved_path}"] = ref_schema
def file_handler(uri: str):
"""Resolve file:// URIs by checking store, disk, and fallback to resources/."""
path_str = uri.replace("file://", "").replace("file:", "")
p = Path(path_str)
resolved = str(p.resolve())
# Check store by resolved path or file URI
for key in [resolved, f"file://{resolved}", uri]:
if key in store:
return store[key]
# Check disk
if p.exists():
data = load_json(p)
if data:
store[resolved] = data
return data
# Fallback: schema $refs may resolve incorrectly due to trailing slashes.
# Try finding the filename in resources/ directory tree.
filename = p.name
for candidate in self.resources.rglob(filename):
data = load_json(candidate)
if data:
store[resolved] = data
return data
raise FileNotFoundError(f"Schema not found: {uri}")
resolver = RefResolver(
base_uri=f"file://{schema_path.parent.resolve()}/",
referrer=schema,
store=store,
handlers={"file": file_handler},
)
# Determine which $def to validate species files against
# Most schemas use oneOf with multiple variants; species files match fauna_species/flora_species
species_def = None
if "oneOf" in schema:
for ref in schema["oneOf"]:
ref_path = ref.get("$ref", "")
if "species" in ref_path:
species_def = ref_path
break
category = schema_path.stem.replace(".schema", "")
data_files = sorted(data_dir.glob("*.json"))
data_files = [f for f in data_files if not f.name.endswith(".schema.json")]
if not data_files:
if self.verbose:
print(f" (no data files in {data_dir.relative_to(self.root)})")
return
for data_file in data_files:
data = load_json(data_file)
if data is None:
self.error(f"PARSE ERROR: {data_file.relative_to(self.root)}")
continue
# If we identified a species $def and the file has "id" + "name" (species format),
# validate against that specific $def instead of the oneOf root
if species_def and isinstance(data, dict) and "id" in data and "name" in data:
try:
resolved = resolver.resolve(species_def)
sub_schema = resolved[1]
validator = Draft202012Validator(sub_schema, resolver=resolver)
errs = list(validator.iter_errors(data))
if errs:
for e in errs[:3]:
path = ".".join(str(p) for p in e.absolute_path)
self.error(f"SCHEMA: {data_file.name}: {path}: {e.message}")
else:
self.ok(data_file.name)
except Exception as e:
# Fall back to root schema validation
try:
validator = Draft202012Validator(schema, resolver=resolver)
errs = list(validator.iter_errors(data))
if errs:
for err in errs[:3]:
self.error(f"SCHEMA: {data_file.name}: {err.message}")
else:
self.ok(data_file.name)
except Exception as e2:
self.error(f"VALIDATOR ERROR: {data_file.name}: {e2}")
else:
try:
validator = Draft202012Validator(schema, resolver=resolver)
errs = list(validator.iter_errors(data))
if errs:
for e in errs[:3]:
self.error(f"SCHEMA: {data_file.name}: {e.message}")
else:
self.ok(data_file.name)
except Exception as e:
self.error(f"VALIDATOR ERROR: {data_file.name}: {e}")
# ── Structural checks ─────────────────────────────────────────────
def check_traits(self):
"""Verify all fauna species have the 7 required trait categories."""
print("\n── Trait completeness ──")
required_prefixes = ["size_", "diet_", "habitat_", "locomotion_", "thermal_", "repro_", "social_"]
species_dir = self.resources / "ecology" / "fauna" / "species"
for f in sorted(species_dir.glob("*.json")):
data = load_json(f)
if data is None or "traits" not in data:
continue
traits = data["traits"]
for prefix in required_prefixes:
if not any(t.startswith(prefix) for t in traits):
self.error(f"TRAIT: {f.name}: missing {prefix}* trait")
if all(any(t.startswith(p) for t in traits) for p in required_prefixes):
self.ok(f.name)
def check_refs(self):
"""Verify prey, evolved_from, flora_dependencies reference existing species."""
print("\n── Cross-reference integrity ──")
fauna_dir = self.resources / "ecology" / "fauna" / "species"
flora_dir = self.resources / "ecology" / "flora" / "species"
fauna_ids = set()
flora_ids = set()
all_species: dict[str, dict] = {}
for f in fauna_dir.glob("*.json"):
data = load_json(f)
if data and "id" in data:
fauna_ids.add(data["id"])
all_species[data["id"]] = data
for f in flora_dir.glob("*.json"):
data = load_json(f)
if data and "id" in data:
flora_ids.add(data["id"])
all_ids = fauna_ids | flora_ids
broken_prey = []
broken_evolved = []
broken_flora_deps = []
for sid, sp in all_species.items():
for prey_id in sp.get("prey", []):
if prey_id not in all_ids:
broken_prey.append(f"{sid}{prey_id}")
evolved = sp.get("evolved_from")
if evolved and evolved not in fauna_ids:
broken_evolved.append(f"{sid}{evolved}")
for dep in sp.get("flora_dependencies", []):
if dep not in flora_ids:
broken_flora_deps.append(f"{sid}{dep}")
if broken_prey:
for ref in broken_prey:
self.error(f"PREY REF: {ref} (target does not exist)")
else:
self.ok(f"All prey references valid ({len(all_species)} species checked)")
if broken_evolved:
for ref in broken_evolved:
self.error(f"EVOLVED_FROM REF: {ref} (target does not exist)")
else:
self.ok(f"All evolved_from references valid")
if broken_flora_deps:
for ref in broken_flora_deps:
self.error(f"FLORA_DEP REF: {ref} (target does not exist)")
else:
self.ok(f"All flora_dependencies references valid")
def check_trophic(self):
"""Verify trophic pyramid and lineage structure."""
print("\n── Trophic & lineage structure ──")
fauna_dir = self.resources / "ecology" / "fauna" / "species"
trophic = Counter()
lineage_tiers: dict[str, set[int]] = defaultdict(set)
biome_trophic: dict[str, Counter] = defaultdict(Counter)
domain_count = Counter()
lineage_count = Counter()
for f in sorted(fauna_dir.glob("*.json")):
data = load_json(f)
if data is None or "id" not in data:
continue
tl = data.get("trophic_level", "unknown")
trophic[tl] += 1
domain_count[data.get("domain", "unknown")] += 1
lin = data.get("lineage", "")
tier = data.get("ecology_tier", 0)
if lin:
lineage_tiers[lin].add(tier)
lineage_count[lin] += 1
for biome in data.get("biomes", []):
biome_trophic[biome][tl] += 1
# Trophic pyramid. Real ecosystems have more herbivores than predators by a wide
# margin, but for a game roster we accept predators ≤ 1.15× (herbivores + omnivores)
# since omnivores partially fill the prey-base role. Strictly inverted pyramids
# (more pure predators than pure herbivores + omnivores combined) indicate a real
# data gap.
herb = trophic.get("herbivore", 0)
pred = trophic.get("predator", 0) + trophic.get("apex_predator", 0)
omni = trophic.get("omnivore", 0)
total = sum(trophic.values())
prey_base = herb + omni
ratio = pred / prey_base if prey_base > 0 else float('inf')
print(f" Trophic: herbivore={herb} omnivore={omni} predator={pred} total={total} ratio={ratio:.2f}")
if ratio > 1.15:
self.error(
f"TROPHIC: predators ({pred}) exceed 1.15× prey base ({prey_base}), "
f"ratio={ratio:.2f} — inverted pyramid"
)
else:
self.ok(f"Trophic pyramid: predator ratio {ratio:.2f} ≤ 1.15")
# Lineages missing T1. Some lineages legitimately start above T1:
# - pinnipeds: seals/walruses are transitional marine mammals, inherently specialized;
# no meaningful T1 "primitive seal" exists that isn't already a felid/mustelid
t1_exempt_lineages = {"pinnipeds"}
missing_t1 = [l for l, tiers in lineage_tiers.items()
if 1 not in tiers and l != "fantasy" and l not in t1_exempt_lineages]
if missing_t1:
for l in sorted(missing_t1):
tiers = sorted(lineage_tiers[l])
self.error(f"LINEAGE: {l} has no T1 root (tiers: {tiers})")
else:
self.ok(f"All {len(lineage_tiers)} lineages have T1 roots")
# Lineages with tier gaps > 2
for l, tiers in sorted(lineage_tiers.items()):
sorted_tiers = sorted(t for t in tiers if t > 0)
if len(sorted_tiers) < 2:
continue
for i in range(len(sorted_tiers) - 1):
gap = sorted_tiers[i + 1] - sorted_tiers[i]
if gap > 2:
self.error(f"TIER GAP: {l} has T{sorted_tiers[i]}→T{sorted_tiers[i+1]} (gap={gap})")
# Biomes with predators but no herbivores.
# Extreme biomes (volcanic, ice, deep) legitimately lack herbivores —
# their food chains run through detritivores/chemosynthesis/filter feeders.
extreme_biomes = {
"lava_field", "volcanic", "volcanic_plains", "ice", "sea_ice",
"hadal_zone", "abyssal_plain", "ancient_lakebed", "rocky_waste",
"basalt_highland", "canyon", "coastal_cliffs", "cliffs",
}
broken_biomes = []
for biome, counts in sorted(biome_trophic.items()):
if biome in extreme_biomes:
continue
has_pred = counts.get("predator", 0) + counts.get("apex_predator", 0) > 0
has_herb = counts.get("herbivore", 0) + counts.get("omnivore", 0) > 0
if has_pred and not has_herb:
broken_biomes.append(biome)
if broken_biomes:
for b in broken_biomes:
self.error(f"BIOME CHAIN: {b} has predators but no herbivores/omnivores")
else:
self.ok("All biomes with predators have prey species")
# Domain balance
print(f" Domain: {dict(domain_count)}")
def check_lineage_ecology_tier(self):
"""Verify all fauna have lineage + ecology_tier, all flora have lineage + quality_tier."""
print("\n── Required fields ──")
fauna_dir = self.resources / "ecology" / "fauna" / "species"
flora_dir = self.resources / "ecology" / "flora" / "species"
for f in sorted(fauna_dir.glob("*.json")):
data = load_json(f)
if data is None or "id" not in data:
continue
if not data.get("lineage"):
self.error(f"FIELD: {f.name}: missing lineage")
if not data.get("ecology_tier"):
self.error(f"FIELD: {f.name}: missing ecology_tier")
for f in sorted(flora_dir.glob("*.json")):
data = load_json(f)
if data is None or "id" not in data:
continue
if not data.get("lineage"):
self.error(f"FIELD: {f.name}: missing lineage")
if not data.get("quality_tier"):
self.error(f"FIELD: {f.name}: missing quality_tier")
# ── Runner ────────────────────────────────────────────────────────
def run_schema_validation(self, category: str | None = None):
print("── JSON Schema validation ──")
pairs = self.find_schemas(category)
for schema_path, data_dir in pairs:
rel_schema = schema_path.relative_to(self.root)
rel_data = data_dir.relative_to(self.root)
n_files = len(list(data_dir.glob("*.json"))) - len(list(data_dir.glob("*.schema.json")))
if n_files == 0:
continue
print(f"\n {rel_schema}{rel_data} ({n_files} files)")
self.validate_against_schema(schema_path, data_dir)
def run_all(self, category: str | None = None):
self.run_schema_validation(category)
self.check_lineage_ecology_tier()
self.check_traits()
self.check_refs()
self.check_trophic()
def report(self) -> int:
print(f"\n{'' * 60}")
print(f" PASSED: {self.passed} FAILED: {self.failed}")
if self.errors:
print(f"\n Failures:")
for e in self.errors[:50]:
print(f"{e}")
if len(self.errors) > 50:
print(f" ... and {len(self.errors) - 50} more")
print(f"{'' * 60}")
return 1 if self.failed > 0 else 0
def main():
parser = argparse.ArgumentParser(description="Validate resource JSON against schemas")
parser.add_argument("--root", type=Path, default=Path(__file__).parent.parent,
help="Project root directory")
parser.add_argument("--category", type=str, default=None,
help="Filter to schema category (e.g. ecology/fauna)")
parser.add_argument("--check", type=str, default="all",
choices=["all", "schema", "traits", "refs", "trophic", "fields"],
help="Which checks to run")
parser.add_argument("--verbose", action="store_true", help="Show individual pass results")
args = parser.parse_args()
v = SchemaValidator(args.root, verbose=args.verbose)
if args.check == "all":
v.run_all(args.category)
elif args.check == "schema":
v.run_schema_validation(args.category)
elif args.check == "traits":
v.check_traits()
elif args.check == "refs":
v.check_refs()
elif args.check == "trophic":
v.check_trophic()
elif args.check == "fields":
v.check_lineage_ecology_tier()
sys.exit(v.report())
if __name__ == "__main__":
main()