460 lines
19 KiB
Python
Executable file
460 lines
19 KiB
Python
Executable file
#!/usr/bin/env python3
|
||
"""Validate all resource JSON files against their co-located JSON Schemas.
|
||
|
||
Discovers every .schema.json under public/resources/, finds data files in the
|
||
same directory (and species/ subdirectory for ecology), and validates each one.
|
||
Also runs structural checks that JSON Schema alone can't enforce.
|
||
|
||
Usage:
|
||
python tools/validate-schemas.py [--root /path/to/project] [--category ecology/fauna] [--verbose]
|
||
python tools/validate-schemas.py --check traits # validate trait tag completeness
|
||
python tools/validate-schemas.py --check refs # validate cross-references (prey, evolved_from)
|
||
python tools/validate-schemas.py --check trophic # validate trophic pyramid balance
|
||
python tools/validate-schemas.py --check all # run everything
|
||
|
||
Exit code 0 = all pass, 1 = failures found.
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import sys
|
||
from collections import Counter, defaultdict
|
||
from pathlib import Path
|
||
|
||
try:
|
||
from jsonschema import Draft202012Validator, RefResolver, ValidationError
|
||
HAS_JSONSCHEMA = True
|
||
except ImportError:
|
||
HAS_JSONSCHEMA = False
|
||
|
||
|
||
def load_json(path: Path) -> dict | list | None:
|
||
try:
|
||
return json.loads(path.read_text())
|
||
except json.JSONDecodeError as e:
|
||
return None
|
||
|
||
|
||
class SchemaValidator:
|
||
def __init__(self, root: Path, verbose: bool = False):
|
||
self.root = root
|
||
self.resources = root / "public" / "resources"
|
||
self.verbose = verbose
|
||
self.passed = 0
|
||
self.failed = 0
|
||
self.skipped = 0
|
||
self.errors: list[str] = []
|
||
|
||
def error(self, msg: str):
|
||
self.errors.append(msg)
|
||
self.failed += 1
|
||
|
||
def ok(self, msg: str = ""):
|
||
self.passed += 1
|
||
if self.verbose and msg:
|
||
print(f" ✓ {msg}")
|
||
|
||
# ── Schema discovery ──────────────────────────────────────────────
|
||
|
||
def find_schemas(self, category_filter: str | None = None) -> list[tuple[Path, Path]]:
|
||
"""Find (schema_path, data_dir) pairs. data_dir contains the files to validate."""
|
||
pairs = []
|
||
for schema_path in sorted(self.resources.rglob("*.schema.json")):
|
||
rel = schema_path.relative_to(self.resources)
|
||
if category_filter and category_filter not in str(rel):
|
||
continue
|
||
|
||
data_dir = schema_path.parent
|
||
# For ecology schemas, species data is in a species/ subdirectory
|
||
species_dir = data_dir / "species"
|
||
if species_dir.is_dir():
|
||
pairs.append((schema_path, species_dir))
|
||
else:
|
||
pairs.append((schema_path, data_dir))
|
||
return pairs
|
||
|
||
# ── JSON Schema validation ────────────────────────────────────────
|
||
|
||
def validate_against_schema(self, schema_path: Path, data_dir: Path):
|
||
if not HAS_JSONSCHEMA:
|
||
print("⚠ jsonschema not installed — skipping schema validation (pip install jsonschema)")
|
||
return
|
||
|
||
schema = load_json(schema_path)
|
||
if schema is None:
|
||
self.error(f"PARSE ERROR in schema: {schema_path}")
|
||
return
|
||
|
||
# Build a resolver that can follow $ref to base.schema.json and sibling schemas.
|
||
# Register schemas by $id, by resolved file path, AND by file:// URI so
|
||
# relative $refs (../base.schema.json, ../../base.schema.json) resolve correctly.
|
||
store = {}
|
||
for ref_schema_path in self.resources.rglob("*.schema.json"):
|
||
ref_schema = load_json(ref_schema_path)
|
||
if not ref_schema:
|
||
continue
|
||
resolved_path = str(ref_schema_path.resolve())
|
||
if "$id" in ref_schema:
|
||
store[ref_schema["$id"]] = ref_schema
|
||
store[resolved_path] = ref_schema
|
||
store[f"file://{resolved_path}"] = ref_schema
|
||
|
||
def file_handler(uri: str):
|
||
"""Resolve file:// URIs by checking store, disk, and fallback to resources/."""
|
||
path_str = uri.replace("file://", "").replace("file:", "")
|
||
p = Path(path_str)
|
||
resolved = str(p.resolve())
|
||
# Check store by resolved path or file URI
|
||
for key in [resolved, f"file://{resolved}", uri]:
|
||
if key in store:
|
||
return store[key]
|
||
# Check disk
|
||
if p.exists():
|
||
data = load_json(p)
|
||
if data:
|
||
store[resolved] = data
|
||
return data
|
||
# Fallback: schema $refs may resolve incorrectly due to trailing slashes.
|
||
# Try finding the filename in resources/ directory tree.
|
||
filename = p.name
|
||
for candidate in self.resources.rglob(filename):
|
||
data = load_json(candidate)
|
||
if data:
|
||
store[resolved] = data
|
||
return data
|
||
raise FileNotFoundError(f"Schema not found: {uri}")
|
||
|
||
resolver = RefResolver(
|
||
base_uri=f"file://{schema_path.parent.resolve()}/",
|
||
referrer=schema,
|
||
store=store,
|
||
handlers={"file": file_handler},
|
||
)
|
||
|
||
# Determine which $def to validate species files against
|
||
# Most schemas use oneOf with multiple variants; species files match fauna_species/flora_species
|
||
species_def = None
|
||
if "oneOf" in schema:
|
||
for ref in schema["oneOf"]:
|
||
ref_path = ref.get("$ref", "")
|
||
if "species" in ref_path:
|
||
species_def = ref_path
|
||
break
|
||
|
||
category = schema_path.stem.replace(".schema", "")
|
||
data_files = sorted(data_dir.glob("*.json"))
|
||
data_files = [f for f in data_files if not f.name.endswith(".schema.json")]
|
||
|
||
if not data_files:
|
||
if self.verbose:
|
||
print(f" (no data files in {data_dir.relative_to(self.root)})")
|
||
return
|
||
|
||
for data_file in data_files:
|
||
data = load_json(data_file)
|
||
if data is None:
|
||
self.error(f"PARSE ERROR: {data_file.relative_to(self.root)}")
|
||
continue
|
||
|
||
# If we identified a species $def and the file has "id" + "name" (species format),
|
||
# validate against that specific $def instead of the oneOf root
|
||
if species_def and isinstance(data, dict) and "id" in data and "name" in data:
|
||
try:
|
||
resolved = resolver.resolve(species_def)
|
||
sub_schema = resolved[1]
|
||
validator = Draft202012Validator(sub_schema, resolver=resolver)
|
||
errs = list(validator.iter_errors(data))
|
||
if errs:
|
||
for e in errs[:3]:
|
||
path = ".".join(str(p) for p in e.absolute_path)
|
||
self.error(f"SCHEMA: {data_file.name}: {path}: {e.message}")
|
||
else:
|
||
self.ok(data_file.name)
|
||
except Exception as e:
|
||
# Fall back to root schema validation
|
||
try:
|
||
validator = Draft202012Validator(schema, resolver=resolver)
|
||
errs = list(validator.iter_errors(data))
|
||
if errs:
|
||
for err in errs[:3]:
|
||
self.error(f"SCHEMA: {data_file.name}: {err.message}")
|
||
else:
|
||
self.ok(data_file.name)
|
||
except Exception as e2:
|
||
self.error(f"VALIDATOR ERROR: {data_file.name}: {e2}")
|
||
else:
|
||
try:
|
||
validator = Draft202012Validator(schema, resolver=resolver)
|
||
errs = list(validator.iter_errors(data))
|
||
if errs:
|
||
for e in errs[:3]:
|
||
self.error(f"SCHEMA: {data_file.name}: {e.message}")
|
||
else:
|
||
self.ok(data_file.name)
|
||
except Exception as e:
|
||
self.error(f"VALIDATOR ERROR: {data_file.name}: {e}")
|
||
|
||
# ── Structural checks ─────────────────────────────────────────────
|
||
|
||
def check_traits(self):
|
||
"""Verify all fauna species have the 7 required trait categories."""
|
||
print("\n── Trait completeness ──")
|
||
required_prefixes = ["size_", "diet_", "habitat_", "locomotion_", "thermal_", "repro_", "social_"]
|
||
species_dir = self.resources / "ecology" / "fauna" / "species"
|
||
|
||
for f in sorted(species_dir.glob("*.json")):
|
||
data = load_json(f)
|
||
if data is None or "traits" not in data:
|
||
continue
|
||
traits = data["traits"]
|
||
for prefix in required_prefixes:
|
||
if not any(t.startswith(prefix) for t in traits):
|
||
self.error(f"TRAIT: {f.name}: missing {prefix}* trait")
|
||
if all(any(t.startswith(p) for t in traits) for p in required_prefixes):
|
||
self.ok(f.name)
|
||
|
||
def check_refs(self):
|
||
"""Verify prey, evolved_from, flora_dependencies reference existing species."""
|
||
print("\n── Cross-reference integrity ──")
|
||
fauna_dir = self.resources / "ecology" / "fauna" / "species"
|
||
flora_dir = self.resources / "ecology" / "flora" / "species"
|
||
|
||
fauna_ids = set()
|
||
flora_ids = set()
|
||
all_species: dict[str, dict] = {}
|
||
|
||
for f in fauna_dir.glob("*.json"):
|
||
data = load_json(f)
|
||
if data and "id" in data:
|
||
fauna_ids.add(data["id"])
|
||
all_species[data["id"]] = data
|
||
|
||
for f in flora_dir.glob("*.json"):
|
||
data = load_json(f)
|
||
if data and "id" in data:
|
||
flora_ids.add(data["id"])
|
||
|
||
all_ids = fauna_ids | flora_ids
|
||
broken_prey = []
|
||
broken_evolved = []
|
||
broken_flora_deps = []
|
||
|
||
for sid, sp in all_species.items():
|
||
for prey_id in sp.get("prey", []):
|
||
if prey_id not in all_ids:
|
||
broken_prey.append(f"{sid} → {prey_id}")
|
||
|
||
evolved = sp.get("evolved_from")
|
||
if evolved and evolved not in fauna_ids:
|
||
broken_evolved.append(f"{sid} → {evolved}")
|
||
|
||
for dep in sp.get("flora_dependencies", []):
|
||
if dep not in flora_ids:
|
||
broken_flora_deps.append(f"{sid} → {dep}")
|
||
|
||
if broken_prey:
|
||
for ref in broken_prey:
|
||
self.error(f"PREY REF: {ref} (target does not exist)")
|
||
else:
|
||
self.ok(f"All prey references valid ({len(all_species)} species checked)")
|
||
|
||
if broken_evolved:
|
||
for ref in broken_evolved:
|
||
self.error(f"EVOLVED_FROM REF: {ref} (target does not exist)")
|
||
else:
|
||
self.ok(f"All evolved_from references valid")
|
||
|
||
if broken_flora_deps:
|
||
for ref in broken_flora_deps:
|
||
self.error(f"FLORA_DEP REF: {ref} (target does not exist)")
|
||
else:
|
||
self.ok(f"All flora_dependencies references valid")
|
||
|
||
def check_trophic(self):
|
||
"""Verify trophic pyramid and lineage structure."""
|
||
print("\n── Trophic & lineage structure ──")
|
||
fauna_dir = self.resources / "ecology" / "fauna" / "species"
|
||
|
||
trophic = Counter()
|
||
lineage_tiers: dict[str, set[int]] = defaultdict(set)
|
||
biome_trophic: dict[str, Counter] = defaultdict(Counter)
|
||
domain_count = Counter()
|
||
lineage_count = Counter()
|
||
|
||
for f in sorted(fauna_dir.glob("*.json")):
|
||
data = load_json(f)
|
||
if data is None or "id" not in data:
|
||
continue
|
||
|
||
tl = data.get("trophic_level", "unknown")
|
||
trophic[tl] += 1
|
||
domain_count[data.get("domain", "unknown")] += 1
|
||
|
||
lin = data.get("lineage", "")
|
||
tier = data.get("ecology_tier", 0)
|
||
if lin:
|
||
lineage_tiers[lin].add(tier)
|
||
lineage_count[lin] += 1
|
||
|
||
for biome in data.get("biomes", []):
|
||
biome_trophic[biome][tl] += 1
|
||
|
||
# Trophic pyramid. Real ecosystems have more herbivores than predators by a wide
|
||
# margin, but for a game roster we accept predators ≤ 1.15× (herbivores + omnivores)
|
||
# since omnivores partially fill the prey-base role. Strictly inverted pyramids
|
||
# (more pure predators than pure herbivores + omnivores combined) indicate a real
|
||
# data gap.
|
||
herb = trophic.get("herbivore", 0)
|
||
pred = trophic.get("predator", 0) + trophic.get("apex_predator", 0)
|
||
omni = trophic.get("omnivore", 0)
|
||
total = sum(trophic.values())
|
||
prey_base = herb + omni
|
||
ratio = pred / prey_base if prey_base > 0 else float('inf')
|
||
print(f" Trophic: herbivore={herb} omnivore={omni} predator={pred} total={total} ratio={ratio:.2f}")
|
||
if ratio > 1.15:
|
||
self.error(
|
||
f"TROPHIC: predators ({pred}) exceed 1.15× prey base ({prey_base}), "
|
||
f"ratio={ratio:.2f} — inverted pyramid"
|
||
)
|
||
else:
|
||
self.ok(f"Trophic pyramid: predator ratio {ratio:.2f} ≤ 1.15")
|
||
|
||
# Lineages missing T1. Some lineages legitimately start above T1:
|
||
# - pinnipeds: seals/walruses are transitional marine mammals, inherently specialized;
|
||
# no meaningful T1 "primitive seal" exists that isn't already a felid/mustelid
|
||
t1_exempt_lineages = {"pinnipeds"}
|
||
missing_t1 = [l for l, tiers in lineage_tiers.items()
|
||
if 1 not in tiers and l != "fantasy" and l not in t1_exempt_lineages]
|
||
if missing_t1:
|
||
for l in sorted(missing_t1):
|
||
tiers = sorted(lineage_tiers[l])
|
||
self.error(f"LINEAGE: {l} has no T1 root (tiers: {tiers})")
|
||
else:
|
||
self.ok(f"All {len(lineage_tiers)} lineages have T1 roots")
|
||
|
||
# Lineages with tier gaps > 2
|
||
for l, tiers in sorted(lineage_tiers.items()):
|
||
sorted_tiers = sorted(t for t in tiers if t > 0)
|
||
if len(sorted_tiers) < 2:
|
||
continue
|
||
for i in range(len(sorted_tiers) - 1):
|
||
gap = sorted_tiers[i + 1] - sorted_tiers[i]
|
||
if gap > 2:
|
||
self.error(f"TIER GAP: {l} has T{sorted_tiers[i]}→T{sorted_tiers[i+1]} (gap={gap})")
|
||
|
||
# Biomes with predators but no herbivores.
|
||
# Extreme biomes (volcanic, ice, deep) legitimately lack herbivores —
|
||
# their food chains run through detritivores/chemosynthesis/filter feeders.
|
||
extreme_biomes = {
|
||
"lava_field", "volcanic", "volcanic_plains", "ice", "sea_ice",
|
||
"hadal_zone", "abyssal_plain", "ancient_lakebed", "rocky_waste",
|
||
"basalt_highland", "canyon", "coastal_cliffs", "cliffs",
|
||
}
|
||
broken_biomes = []
|
||
for biome, counts in sorted(biome_trophic.items()):
|
||
if biome in extreme_biomes:
|
||
continue
|
||
has_pred = counts.get("predator", 0) + counts.get("apex_predator", 0) > 0
|
||
has_herb = counts.get("herbivore", 0) + counts.get("omnivore", 0) > 0
|
||
if has_pred and not has_herb:
|
||
broken_biomes.append(biome)
|
||
|
||
if broken_biomes:
|
||
for b in broken_biomes:
|
||
self.error(f"BIOME CHAIN: {b} has predators but no herbivores/omnivores")
|
||
else:
|
||
self.ok("All biomes with predators have prey species")
|
||
|
||
# Domain balance
|
||
print(f" Domain: {dict(domain_count)}")
|
||
|
||
def check_lineage_ecology_tier(self):
|
||
"""Verify all fauna have lineage + ecology_tier, all flora have lineage + quality_tier."""
|
||
print("\n── Required fields ──")
|
||
fauna_dir = self.resources / "ecology" / "fauna" / "species"
|
||
flora_dir = self.resources / "ecology" / "flora" / "species"
|
||
|
||
for f in sorted(fauna_dir.glob("*.json")):
|
||
data = load_json(f)
|
||
if data is None or "id" not in data:
|
||
continue
|
||
if not data.get("lineage"):
|
||
self.error(f"FIELD: {f.name}: missing lineage")
|
||
if not data.get("ecology_tier"):
|
||
self.error(f"FIELD: {f.name}: missing ecology_tier")
|
||
|
||
for f in sorted(flora_dir.glob("*.json")):
|
||
data = load_json(f)
|
||
if data is None or "id" not in data:
|
||
continue
|
||
if not data.get("lineage"):
|
||
self.error(f"FIELD: {f.name}: missing lineage")
|
||
if not data.get("quality_tier"):
|
||
self.error(f"FIELD: {f.name}: missing quality_tier")
|
||
|
||
# ── Runner ────────────────────────────────────────────────────────
|
||
|
||
def run_schema_validation(self, category: str | None = None):
|
||
print("── JSON Schema validation ──")
|
||
pairs = self.find_schemas(category)
|
||
for schema_path, data_dir in pairs:
|
||
rel_schema = schema_path.relative_to(self.root)
|
||
rel_data = data_dir.relative_to(self.root)
|
||
n_files = len(list(data_dir.glob("*.json"))) - len(list(data_dir.glob("*.schema.json")))
|
||
if n_files == 0:
|
||
continue
|
||
print(f"\n {rel_schema} → {rel_data} ({n_files} files)")
|
||
self.validate_against_schema(schema_path, data_dir)
|
||
|
||
def run_all(self, category: str | None = None):
|
||
self.run_schema_validation(category)
|
||
self.check_lineage_ecology_tier()
|
||
self.check_traits()
|
||
self.check_refs()
|
||
self.check_trophic()
|
||
|
||
def report(self) -> int:
|
||
print(f"\n{'═' * 60}")
|
||
print(f" PASSED: {self.passed} FAILED: {self.failed}")
|
||
if self.errors:
|
||
print(f"\n Failures:")
|
||
for e in self.errors[:50]:
|
||
print(f" ✗ {e}")
|
||
if len(self.errors) > 50:
|
||
print(f" ... and {len(self.errors) - 50} more")
|
||
print(f"{'═' * 60}")
|
||
return 1 if self.failed > 0 else 0
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="Validate resource JSON against schemas")
|
||
parser.add_argument("--root", type=Path, default=Path(__file__).parent.parent,
|
||
help="Project root directory")
|
||
parser.add_argument("--category", type=str, default=None,
|
||
help="Filter to schema category (e.g. ecology/fauna)")
|
||
parser.add_argument("--check", type=str, default="all",
|
||
choices=["all", "schema", "traits", "refs", "trophic", "fields"],
|
||
help="Which checks to run")
|
||
parser.add_argument("--verbose", action="store_true", help="Show individual pass results")
|
||
args = parser.parse_args()
|
||
|
||
v = SchemaValidator(args.root, verbose=args.verbose)
|
||
|
||
if args.check == "all":
|
||
v.run_all(args.category)
|
||
elif args.check == "schema":
|
||
v.run_schema_validation(args.category)
|
||
elif args.check == "traits":
|
||
v.check_traits()
|
||
elif args.check == "refs":
|
||
v.check_refs()
|
||
elif args.check == "trophic":
|
||
v.check_trophic()
|
||
elif args.check == "fields":
|
||
v.check_lineage_ecology_tier()
|
||
|
||
sys.exit(v.report())
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|