194 lines
5.9 KiB
Python
Executable file
194 lines
5.9 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Audit sprite ledger vs on-disk PNGs.
|
|
|
|
Verifies for `public/games/age-of-dwarves/assets/sprites/`:
|
|
|
|
1. Every `*.png` under the tree has exactly one row in `LICENSES.md`.
|
|
2. Every row in `LICENSES.md` points at an existing file on disk.
|
|
3. Every row's SHA256 matches the on-disk file's SHA256.
|
|
4. Every row's `license` value is in the whitelist documented at the
|
|
top of `LICENSES.md`.
|
|
|
|
Exit 0 on success, 1 on any mismatch. Output is human-readable and
|
|
suitable for CI.
|
|
|
|
p2-28 ship gate per `.project/objectives/p2-28-sprite-provenance-ledger.md`.
|
|
|
|
Usage:
|
|
python3 tools/sprite-license-audit.py [--sprites-root <path>]
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
DEFAULT_SPRITES_ROOT = Path("public/games/age-of-dwarves/assets/sprites")
|
|
LEDGER_FILENAME = "LICENSES.md"
|
|
|
|
# Allowed license-column values (must match LICENSES.md whitelist section).
|
|
LICENSE_WHITELIST = {
|
|
"cc0-1.0",
|
|
"cc-by-4.0",
|
|
"cc-by-sa-4.0",
|
|
"mit",
|
|
}
|
|
# Prefixes — `commissioned-commercial`, `model-commercial:<name>` — handled
|
|
# separately because the model-commercial value carries the model name after a
|
|
# colon.
|
|
LICENSE_PREFIX_WHITELIST = (
|
|
"commissioned-commercial",
|
|
"model-commercial:",
|
|
)
|
|
|
|
|
|
def sha256_of(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as fh:
|
|
for chunk in iter(lambda: fh.read(1 << 16), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def parse_ledger(ledger_path: Path) -> list[dict]:
|
|
"""Return rows from the assets table at the bottom of the ledger.
|
|
|
|
Format expected:
|
|
|
|
| Path | Source | License | Author | URL | SHA256 | Added |
|
|
|---|---|---|---|---|---|---|
|
|
| foo.png | ai-generated | model-commercial:juggernaut-xl-v9 | ... |
|
|
|
|
Lines that are not pipe-delimited 7-column rows are ignored. Header
|
|
rows (`Path` literal in column 1, or all-dashes separator) are
|
|
skipped automatically.
|
|
"""
|
|
if not ledger_path.exists():
|
|
raise FileNotFoundError(f"missing ledger: {ledger_path}")
|
|
|
|
rows: list[dict] = []
|
|
in_assets_section = False
|
|
for line in ledger_path.read_text().splitlines():
|
|
if line.strip().startswith("## Assets"):
|
|
in_assets_section = True
|
|
continue
|
|
if not in_assets_section:
|
|
continue
|
|
if line.startswith("##"):
|
|
# next section
|
|
break
|
|
if not line.strip().startswith("|"):
|
|
continue
|
|
# split + strip + drop leading/trailing empty cells from the | wrap
|
|
cells = [c.strip() for c in line.strip().strip("|").split("|")]
|
|
if len(cells) < 7:
|
|
continue
|
|
if cells[0].lower() == "path" or all(set(c) <= {"-", " "} for c in cells):
|
|
continue
|
|
if cells[0].startswith("*("):
|
|
# placeholder (e.g. "*(empty — no sprites shipped yet ...)*")
|
|
continue
|
|
rows.append({
|
|
"path": cells[0],
|
|
"source": cells[1],
|
|
"license": cells[2],
|
|
"author": cells[3],
|
|
"url": cells[4],
|
|
"sha256": cells[5].lower(),
|
|
"added": cells[6],
|
|
})
|
|
return rows
|
|
|
|
|
|
def license_ok(value: str) -> bool:
|
|
if value in LICENSE_WHITELIST:
|
|
return True
|
|
return any(value.startswith(p) for p in LICENSE_PREFIX_WHITELIST)
|
|
|
|
|
|
def find_pngs(root: Path) -> list[Path]:
|
|
return sorted(p for p in root.rglob("*.png"))
|
|
|
|
|
|
def audit(sprites_root: Path) -> int:
|
|
if not sprites_root.exists():
|
|
print(f"FAIL: sprites root not found: {sprites_root}", file=sys.stderr)
|
|
return 1
|
|
|
|
ledger_path = sprites_root / LEDGER_FILENAME
|
|
try:
|
|
rows = parse_ledger(ledger_path)
|
|
except FileNotFoundError as e:
|
|
print(f"FAIL: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
on_disk = find_pngs(sprites_root)
|
|
on_disk_rel = {str(p.relative_to(sprites_root)) for p in on_disk}
|
|
rows_by_path: dict[str, dict] = {}
|
|
duplicates: list[str] = []
|
|
for row in rows:
|
|
if row["path"] in rows_by_path:
|
|
duplicates.append(row["path"])
|
|
rows_by_path[row["path"]] = row
|
|
|
|
errors: list[str] = []
|
|
|
|
if duplicates:
|
|
for d in duplicates:
|
|
errors.append(f"duplicate ledger row for {d!r}")
|
|
|
|
# 1. every PNG → exactly one row
|
|
for rel in sorted(on_disk_rel):
|
|
if rel not in rows_by_path:
|
|
errors.append(f"PNG on disk has no ledger row: {rel}")
|
|
|
|
# 2. every row → existing file with matching SHA256
|
|
# 4. every row → license in whitelist
|
|
for path, row in sorted(rows_by_path.items()):
|
|
full = sprites_root / path
|
|
if not full.exists():
|
|
errors.append(f"ledger row points at missing file: {path}")
|
|
continue
|
|
actual = sha256_of(full)
|
|
if actual != row["sha256"]:
|
|
errors.append(
|
|
f"sha256 mismatch for {path}: "
|
|
f"ledger={row['sha256']} actual={actual}"
|
|
)
|
|
if not license_ok(row["license"]):
|
|
errors.append(
|
|
f"license not in whitelist for {path}: {row['license']!r}"
|
|
)
|
|
|
|
# Report
|
|
print(f"sprites root: {sprites_root}")
|
|
print(f"on disk: {len(on_disk_rel)} PNG file(s)")
|
|
print(f"ledger rows: {len(rows_by_path)}")
|
|
if not errors:
|
|
print("OK — sprite ledger and on-disk tree are consistent.")
|
|
return 0
|
|
|
|
print("")
|
|
print(f"FAIL — {len(errors)} mismatch(es):")
|
|
for e in errors:
|
|
print(f" - {e}")
|
|
return 1
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
parser.add_argument(
|
|
"--sprites-root",
|
|
type=Path,
|
|
default=DEFAULT_SPRITES_ROOT,
|
|
help=f"Sprites root directory (default: {DEFAULT_SPRITES_ROOT})",
|
|
)
|
|
args = parser.parse_args(argv)
|
|
return audit(args.sprites_root)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|