#!/usr/bin/env python3
"""Render the audio LICENSES.md file from sources.csv.

`sources.csv` is the single source of truth for audio asset provenance:
where each `.ogg` came from, under what licence, with what edits applied.
This script reads it and writes a clean `LICENSES.md` that no human ever
hand-edits — the renderer is the gate.

Two modes:

  * No flag → rewrite `LICENSES.md` from `sources.csv`. Use after adding
    or editing rows in the CSV.
  * `--check` → render to a temp buffer and diff against the committed
    `LICENSES.md`. Non-zero diff or any policy violation fails. Used by
    `./run validate` and CI.

License policy enforced (per p2-16 acceptance):

  * Any `license` value containing `-SA` (ShareAlike) or `-NC`
    (NonCommercial) is rejected outright. SA would force engine source-
    sharing terms onto bundled audio; NC blocks commercial release.
  * `license` values must be one of a small allowlist — see
    `ALLOWED_LICENSE_PATTERNS` below. Rejecting unknown strings prevents
    typos like `CC-BY-3.0-SA-NC` from sneaking through.
  * Every CC-BY row MUST have a non-empty `attribution` field — that's
    the credit line the licence requires.
  * Every row MUST point at a real path under
    `public/games/<theme>/assets/audio/`.

Usage:
    python3 tools/audio-licenses-render.py [--check] [--theme age-of-dwarves]
"""
from __future__ import annotations

import argparse
import csv
import io
import re
import sys
from pathlib import Path

REPO = Path(__file__).resolve().parent.parent
DEFAULT_THEME = "age-of-dwarves"

# Licenses we accept. The `-SA` and `-NC` modifiers are explicitly
# blocked downstream regardless of which family they appear in.
ALLOWED_LICENSE_PATTERNS = [
    re.compile(r"^CC0(-1\.0)?$"),
    re.compile(r"^CC-BY-3\.0$"),
    re.compile(r"^CC-BY-4\.0$"),
    re.compile(r"^Pixabay$"),
    re.compile(r"^Sonniss-GDC-\d{4}$"),
    re.compile(r"^Public-Domain$"),
]
BLOCKED_TOKENS = ("-SA", "-NC")


def reject_license(license_str: str) -> str | None:
    """Return an error message if the licence is not acceptable, else None."""
    if not license_str:
        return "license field is empty"
    for token in BLOCKED_TOKENS:
        if token in license_str:
            return (
                f"license {license_str!r} contains forbidden modifier "
                f"{token!r} — ShareAlike and NonCommercial are blocked"
            )
    for pat in ALLOWED_LICENSE_PATTERNS:
        if pat.match(license_str):
            return None
    return (
        f"license {license_str!r} is not on the allowlist "
        f"(see ALLOWED_LICENSE_PATTERNS in tools/audio-licenses-render.py)"
    )


def requires_attribution(license_str: str) -> bool:
    return license_str.startswith("CC-BY-")


def render(rows: list[dict], theme: str) -> str:
    out: list[str] = []
    out.append(f"# Audio Asset Licenses — {theme}")
    out.append("")
    out.append(
        "**Auto-generated from `sources.csv` by "
        "`tools/audio-licenses-render.py`. Do not edit by hand — "
        "edit the CSV and re-render.**"
    )
    out.append("")
    out.append(
        f"Each row records one `.ogg` shipped under "
        f"`public/games/{theme}/assets/audio/`. Licence policy: CC0 / "
        f"CC-BY 3.0 / CC-BY 4.0 / Pixabay / Sonniss-GDC-YYYY / "
        f"Public-Domain accepted. ShareAlike (`-SA`) and "
        f"NonCommercial (`-NC`) are rejected by the renderer."
    )
    out.append("")
    out.append(
        f"**Asset count:** {len(rows)} files. (Empty until p2-16 sourcing "
        f"begins.)"
    )
    out.append("")
    out.append("## Assets")
    out.append("")
    if not rows:
        out.append("*(none yet — drop files into the assets tree and add their")
        out.append("rows to `sources.csv`, then re-run this script)*")
        out.append("")
    else:
        out.append("| Path | License | Source | Attribution | Edits | Added |")
        out.append("|------|---------|--------|-------------|-------|-------|")
        for row in sorted(rows, key=lambda r: r["output_path"]):
            attribution = row.get("attribution") or "—"
            edits = row.get("edits") or "—"
            out.append(
                "| `{path}` | {lic} | [link]({src}) | {attr} | {edits} | {added} |"
                .format(
                    path=row["output_path"],
                    lic=row["license"],
                    src=row["source_url"],
                    attr=attribution,
                    edits=edits,
                    added=row.get("added", ""),
                )
            )
        out.append("")
    out.append("## Encoding")
    out.append("")
    out.append("All audio normalised to:")
    out.append("")
    out.append("* Ogg Vorbis container, `.ogg` extension")
    out.append("* 44.1 kHz sample rate")
    out.append("* 128 kbps target bitrate")
    out.append("* Stereo (SFX may be mono)")
    out.append("* Peak ~−3 dBFS — per-event `volume_db` in `audio.json` "
               "scales from there")
    out.append("* Music tracks must be seamless loops (except `victory`)")
    out.append("")
    out.append("## How to add a new asset")
    out.append("")
    out.append("1. Source it from one of the approved providers (CC0 /")
    out.append("   Pixabay / Sonniss / Freesound CC-BY / OpenGameArt CC-BY).")
    out.append("2. Edit + normalise to the encoding spec above.")
    out.append(f"3. Drop it under `public/games/{theme}/assets/audio/...`")
    out.append("4. Append a row to `sources.csv`:")
    out.append("   ```csv")
    out.append("   audio/sfx/units/melee/attack_01.ogg,https://freesound.org/.../123,CC-BY-4.0,Author Name,trim+normalize,2026-04-26")
    out.append("   ```")
    out.append("5. Run `python3 tools/audio-licenses-render.py` to regenerate")
    out.append("   this file, then `python3 tools/audio-validate.py` to")
    out.append("   confirm the manifest still validates.")
    return "\n".join(out) + "\n"


def load_rows(csv_path: Path) -> list[dict]:
    if not csv_path.exists():
        return []
    with csv_path.open("r", encoding="utf-8") as f:
        # Strip leading `#` comment lines; csv.DictReader doesn't skip them.
        cleaned = io.StringIO(
            "".join(line for line in f if not line.lstrip().startswith("#"))
        )
        reader = csv.DictReader(cleaned)
        rows = [r for r in reader if r.get("output_path")]
    return rows


def validate_rows(rows: list[dict], theme: str) -> list[str]:
    errors: list[str] = []
    asset_root = REPO / "public" / "games" / theme / "assets"
    seen_paths: set[str] = set()
    required = {"output_path", "source_url", "license", "attribution",
                "edits", "added"}
    for i, row in enumerate(rows, start=1):
        missing = required - set(row.keys())
        if missing:
            errors.append(
                f"row {i}: missing column(s) {sorted(missing)}"
            )
            continue
        path = row["output_path"]
        if path in seen_paths:
            errors.append(f"row {i}: duplicate output_path {path!r}")
        seen_paths.add(path)
        license_str = row["license"]
        problem = reject_license(license_str)
        if problem:
            errors.append(f"row {i} ({path}): {problem}")
        if requires_attribution(license_str) and not row["attribution"]:
            errors.append(
                f"row {i} ({path}): {license_str} requires a non-empty "
                f"attribution"
            )
        if not row["source_url"].startswith(("http://", "https://")):
            errors.append(
                f"row {i} ({path}): source_url must be an http(s) URL"
            )
        full = asset_root / path
        if not full.exists():
            # Not fatal — file may still be incoming. Validator surfaces it
            # as a warning via tools/audio-validate.py.
            pass
    return errors


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--check", action="store_true",
        help="Diff rendered output against the committed LICENSES.md "
             "and exit non-zero on any drift or policy violation.",
    )
    parser.add_argument(
        "--theme", default=DEFAULT_THEME,
        help=f"Theme id under public/games/. Default: {DEFAULT_THEME}",
    )
    args = parser.parse_args()

    asset_dir = REPO / "public" / "games" / args.theme / "assets" / "audio"
    sources_csv = asset_dir / "sources.csv"
    licenses_md = asset_dir / "LICENSES.md"

    if not sources_csv.exists():
        print(f"[{args.theme}] sources.csv not found at {sources_csv}",
              file=sys.stderr)
        return 1

    rows = load_rows(sources_csv)
    errors = validate_rows(rows, args.theme)
    if errors:
        print(f"[{args.theme}] sources.csv has {len(errors)} error(s):",
              file=sys.stderr)
        for e in errors:
            print(f"  ✗ {e}", file=sys.stderr)
        return 1

    rendered = render(rows, args.theme)

    if args.check:
        committed = licenses_md.read_text(encoding="utf-8") if licenses_md.exists() else ""
        if committed != rendered:
            print(
                f"[{args.theme}] LICENSES.md is out of date relative to "
                f"sources.csv. Run `python3 tools/audio-licenses-render.py` "
                f"and commit the result.",
                file=sys.stderr,
            )
            return 1
        print(f"[{args.theme}] LICENSES.md is in sync ({len(rows)} rows). OK")
        return 0

    licenses_md.write_text(rendered, encoding="utf-8")
    print(f"[{args.theme}] wrote {licenses_md} ({len(rows)} rows)")
    return 0


if __name__ == "__main__":
    sys.exit(main())