#!/usr/bin/env python3 """Render the audio LICENSES.md file from sources.csv. `sources.csv` is the single source of truth for audio asset provenance: where each `.ogg` came from, under what licence, with what edits applied. This script reads it and writes a clean `LICENSES.md` that no human ever hand-edits — the renderer is the gate. Two modes: * No flag → rewrite `LICENSES.md` from `sources.csv`. Use after adding or editing rows in the CSV. * `--check` → render to a temp buffer and diff against the committed `LICENSES.md`. Non-zero diff or any policy violation fails. Used by `./run validate` and CI. License policy enforced (per p2-16 acceptance): * Any `license` value containing `-SA` (ShareAlike) or `-NC` (NonCommercial) is rejected outright. SA would force engine source- sharing terms onto bundled audio; NC blocks commercial release. * `license` values must be one of a small allowlist — see `ALLOWED_LICENSE_PATTERNS` below. Rejecting unknown strings prevents typos like `CC-BY-3.0-SA-NC` from sneaking through. * Every CC-BY row MUST have a non-empty `attribution` field — that's the credit line the licence requires. * Every row MUST point at a real path under `public/games//assets/audio/`. Usage: python3 tools/audio-licenses-render.py [--check] [--theme age-of-dwarves] """ from __future__ import annotations import argparse import csv import io import re import sys from pathlib import Path REPO = Path(__file__).resolve().parent.parent DEFAULT_THEME = "age-of-dwarves" # Licenses we accept. The `-SA` and `-NC` modifiers are explicitly # blocked downstream regardless of which family they appear in. ALLOWED_LICENSE_PATTERNS = [ re.compile(r"^CC0(-1\.0)?$"), re.compile(r"^CC-BY-3\.0$"), re.compile(r"^CC-BY-4\.0$"), re.compile(r"^Pixabay$"), re.compile(r"^Sonniss-GDC-\d{4}$"), re.compile(r"^Public-Domain$"), ] BLOCKED_TOKENS = ("-SA", "-NC") def reject_license(license_str: str) -> str | None: """Return an error message if the licence is not acceptable, else None.""" if not license_str: return "license field is empty" for token in BLOCKED_TOKENS: if token in license_str: return ( f"license {license_str!r} contains forbidden modifier " f"{token!r} — ShareAlike and NonCommercial are blocked" ) for pat in ALLOWED_LICENSE_PATTERNS: if pat.match(license_str): return None return ( f"license {license_str!r} is not on the allowlist " f"(see ALLOWED_LICENSE_PATTERNS in tools/audio-licenses-render.py)" ) def requires_attribution(license_str: str) -> bool: return license_str.startswith("CC-BY-") def render(rows: list[dict], theme: str) -> str: out: list[str] = [] out.append(f"# Audio Asset Licenses — {theme}") out.append("") out.append( "**Auto-generated from `sources.csv` by " "`tools/audio-licenses-render.py`. Do not edit by hand — " "edit the CSV and re-render.**" ) out.append("") out.append( f"Each row records one `.ogg` shipped under " f"`public/games/{theme}/assets/audio/`. Licence policy: CC0 / " f"CC-BY 3.0 / CC-BY 4.0 / Pixabay / Sonniss-GDC-YYYY / " f"Public-Domain accepted. ShareAlike (`-SA`) and " f"NonCommercial (`-NC`) are rejected by the renderer." ) out.append("") out.append( f"**Asset count:** {len(rows)} files. (Empty until p2-16 sourcing " f"begins.)" ) out.append("") out.append("## Assets") out.append("") if not rows: out.append("*(none yet — drop files into the assets tree and add their") out.append("rows to `sources.csv`, then re-run this script)*") out.append("") else: out.append("| Path | License | Source | Attribution | Edits | Added |") out.append("|------|---------|--------|-------------|-------|-------|") for row in sorted(rows, key=lambda r: r["output_path"]): attribution = row.get("attribution") or "—" edits = row.get("edits") or "—" out.append( "| `{path}` | {lic} | [link]({src}) | {attr} | {edits} | {added} |" .format( path=row["output_path"], lic=row["license"], src=row["source_url"], attr=attribution, edits=edits, added=row.get("added", ""), ) ) out.append("") out.append("## Encoding") out.append("") out.append("All audio normalised to:") out.append("") out.append("* Ogg Vorbis container, `.ogg` extension") out.append("* 44.1 kHz sample rate") out.append("* 128 kbps target bitrate") out.append("* Stereo (SFX may be mono)") out.append("* Peak ~−3 dBFS — per-event `volume_db` in `audio.json` " "scales from there") out.append("* Music tracks must be seamless loops (except `victory`)") out.append("") out.append("## How to add a new asset") out.append("") out.append("1. Source it from one of the approved providers (CC0 /") out.append(" Pixabay / Sonniss / Freesound CC-BY / OpenGameArt CC-BY).") out.append("2. Edit + normalise to the encoding spec above.") out.append(f"3. Drop it under `public/games/{theme}/assets/audio/...`") out.append("4. Append a row to `sources.csv`:") out.append(" ```csv") out.append(" audio/sfx/units/melee/attack_01.ogg,https://freesound.org/.../123,CC-BY-4.0,Author Name,trim+normalize,2026-04-26") out.append(" ```") out.append("5. Run `python3 tools/audio-licenses-render.py` to regenerate") out.append(" this file, then `python3 tools/audio-validate.py` to") out.append(" confirm the manifest still validates.") return "\n".join(out) + "\n" def load_rows(csv_path: Path) -> list[dict]: if not csv_path.exists(): return [] with csv_path.open("r", encoding="utf-8") as f: # Strip leading `#` comment lines; csv.DictReader doesn't skip them. cleaned = io.StringIO( "".join(line for line in f if not line.lstrip().startswith("#")) ) reader = csv.DictReader(cleaned) rows = [r for r in reader if r.get("output_path")] return rows def validate_rows(rows: list[dict], theme: str) -> list[str]: errors: list[str] = [] asset_root = REPO / "public" / "games" / theme / "assets" seen_paths: set[str] = set() required = {"output_path", "source_url", "license", "attribution", "edits", "added"} for i, row in enumerate(rows, start=1): missing = required - set(row.keys()) if missing: errors.append( f"row {i}: missing column(s) {sorted(missing)}" ) continue path = row["output_path"] if path in seen_paths: errors.append(f"row {i}: duplicate output_path {path!r}") seen_paths.add(path) license_str = row["license"] problem = reject_license(license_str) if problem: errors.append(f"row {i} ({path}): {problem}") if requires_attribution(license_str) and not row["attribution"]: errors.append( f"row {i} ({path}): {license_str} requires a non-empty " f"attribution" ) if not row["source_url"].startswith(("http://", "https://")): errors.append( f"row {i} ({path}): source_url must be an http(s) URL" ) full = asset_root / path if not full.exists(): # Not fatal — file may still be incoming. Validator surfaces it # as a warning via tools/audio-validate.py. pass return errors def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--check", action="store_true", help="Diff rendered output against the committed LICENSES.md " "and exit non-zero on any drift or policy violation.", ) parser.add_argument( "--theme", default=DEFAULT_THEME, help=f"Theme id under public/games/. Default: {DEFAULT_THEME}", ) args = parser.parse_args() asset_dir = REPO / "public" / "games" / args.theme / "assets" / "audio" sources_csv = asset_dir / "sources.csv" licenses_md = asset_dir / "LICENSES.md" if not sources_csv.exists(): print(f"[{args.theme}] sources.csv not found at {sources_csv}", file=sys.stderr) return 1 rows = load_rows(sources_csv) errors = validate_rows(rows, args.theme) if errors: print(f"[{args.theme}] sources.csv has {len(errors)} error(s):", file=sys.stderr) for e in errors: print(f" ✗ {e}", file=sys.stderr) return 1 rendered = render(rows, args.theme) if args.check: committed = licenses_md.read_text(encoding="utf-8") if licenses_md.exists() else "" if committed != rendered: print( f"[{args.theme}] LICENSES.md is out of date relative to " f"sources.csv. Run `python3 tools/audio-licenses-render.py` " f"and commit the result.", file=sys.stderr, ) return 1 print(f"[{args.theme}] LICENSES.md is in sync ({len(rows)} rows). OK") return 0 licenses_md.write_text(rendered, encoding="utf-8") print(f"[{args.theme}] wrote {licenses_md} ({len(rows)} rows)") return 0 if __name__ == "__main__": sys.exit(main())