magicciv/tools/sprite-license-audit.py
2026-06-03 05:46:13 -07:00

195 lines
5.9 KiB
Python
Executable file

#!/usr/bin/env python3
"""Audit sprite ledger vs on-disk PNGs.
Verifies for `public/games/age-of-dwarves/assets/sprites/`:
1. Every `*.png` under the tree has exactly one row in `LICENSES.md`.
2. Every row in `LICENSES.md` points at an existing file on disk.
3. Every row's SHA256 matches the on-disk file's SHA256.
4. Every row's `license` value is in the whitelist documented at the
top of `LICENSES.md`.
Exit 0 on success, 1 on any mismatch. Output is human-readable and
suitable for CI.
p2-28 ship gate per `.project/objectives/p2-28-sprite-provenance-ledger.md`.
Usage:
python3 tools/sprite-license-audit.py [--sprites-root <path>]
"""
from __future__ import annotations
import argparse
import hashlib
import re
import sys
from pathlib import Path
DEFAULT_SPRITES_ROOT = Path("public/games/age-of-dwarves/assets/sprites")
LEDGER_FILENAME = "LICENSES.md"
# Allowed license-column values (must match LICENSES.md whitelist section).
LICENSE_WHITELIST = {
"cc0-1.0",
"cc-by-3.0",
"cc-by-4.0",
"cc-by-sa-4.0",
"mit",
}
# Prefixes — `commissioned-commercial`, `model-commercial:<name>` — handled
# separately because the model-commercial value carries the model name after a
# colon.
LICENSE_PREFIX_WHITELIST = (
"commissioned-commercial",
"model-commercial:",
)
def sha256_of(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as fh:
for chunk in iter(lambda: fh.read(1 << 16), b""):
h.update(chunk)
return h.hexdigest()
def parse_ledger(ledger_path: Path) -> list[dict]:
"""Return rows from the assets table at the bottom of the ledger.
Format expected:
| Path | Source | License | Author | URL | SHA256 | Added |
|---|---|---|---|---|---|---|
| foo.png | ai-generated | model-commercial:juggernaut-xl-v9 | ... |
Lines that are not pipe-delimited 7-column rows are ignored. Header
rows (`Path` literal in column 1, or all-dashes separator) are
skipped automatically.
"""
if not ledger_path.exists():
raise FileNotFoundError(f"missing ledger: {ledger_path}")
rows: list[dict] = []
in_assets_section = False
for line in ledger_path.read_text().splitlines():
if line.strip().startswith("## Assets"):
in_assets_section = True
continue
if not in_assets_section:
continue
if line.startswith("##"):
# next section
break
if not line.strip().startswith("|"):
continue
# split + strip + drop leading/trailing empty cells from the | wrap
cells = [c.strip() for c in line.strip().strip("|").split("|")]
if len(cells) < 7:
continue
if cells[0].lower() == "path" or all(set(c) <= {"-", " "} for c in cells):
continue
if cells[0].startswith("*("):
# placeholder (e.g. "*(empty — no sprites shipped yet ...)*")
continue
rows.append({
"path": cells[0],
"source": cells[1],
"license": cells[2],
"author": cells[3],
"url": cells[4],
"sha256": cells[5].lower(),
"added": cells[6],
})
return rows
def license_ok(value: str) -> bool:
if value in LICENSE_WHITELIST:
return True
return any(value.startswith(p) for p in LICENSE_PREFIX_WHITELIST)
def find_pngs(root: Path) -> list[Path]:
return sorted(p for p in root.rglob("*.png"))
def audit(sprites_root: Path) -> int:
if not sprites_root.exists():
print(f"FAIL: sprites root not found: {sprites_root}", file=sys.stderr)
return 1
ledger_path = sprites_root / LEDGER_FILENAME
try:
rows = parse_ledger(ledger_path)
except FileNotFoundError as e:
print(f"FAIL: {e}", file=sys.stderr)
return 1
on_disk = find_pngs(sprites_root)
on_disk_rel = {str(p.relative_to(sprites_root)) for p in on_disk}
rows_by_path: dict[str, dict] = {}
duplicates: list[str] = []
for row in rows:
if row["path"] in rows_by_path:
duplicates.append(row["path"])
rows_by_path[row["path"]] = row
errors: list[str] = []
if duplicates:
for d in duplicates:
errors.append(f"duplicate ledger row for {d!r}")
# 1. every PNG → exactly one row
for rel in sorted(on_disk_rel):
if rel not in rows_by_path:
errors.append(f"PNG on disk has no ledger row: {rel}")
# 2. every row → existing file with matching SHA256
# 4. every row → license in whitelist
for path, row in sorted(rows_by_path.items()):
full = sprites_root / path
if not full.exists():
errors.append(f"ledger row points at missing file: {path}")
continue
actual = sha256_of(full)
if actual != row["sha256"]:
errors.append(
f"sha256 mismatch for {path}: "
f"ledger={row['sha256']} actual={actual}"
)
if not license_ok(row["license"]):
errors.append(
f"license not in whitelist for {path}: {row['license']!r}"
)
# Report
print(f"sprites root: {sprites_root}")
print(f"on disk: {len(on_disk_rel)} PNG file(s)")
print(f"ledger rows: {len(rows_by_path)}")
if not errors:
print("OK — sprite ledger and on-disk tree are consistent.")
return 0
print("")
print(f"FAIL — {len(errors)} mismatch(es):")
for e in errors:
print(f" - {e}")
return 1
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument(
"--sprites-root",
type=Path,
default=DEFAULT_SPRITES_ROOT,
help=f"Sprites root directory (default: {DEFAULT_SPRITES_ROOT})",
)
args = parser.parse_args(argv)
return audit(args.sprites_root)
if __name__ == "__main__":
sys.exit(main())