magicciv/tools/matchup-metrics-report.py

#!/usr/bin/env python3
"""matchup-metrics-report.py — multi-metric per-pair report for matchup-grid batches.

Computes per-perspective averages for: tier_peak, peak_unit_tier, kills, units_lost,
gold_peak, total_combats. Helps determine if personality scorer drives differentiation
on metrics other than the (often-converging) tier_peak ceiling.

Usage:
    python3 tools/matchup-metrics-report.py <batch-dir>
"""
from __future__ import annotations

import json
import statistics
import sys
from pathlib import Path


def collect_games(sub: Path) -> list[dict]:
    games: list[dict] = []
    for d in sorted(sub.iterdir()):
        if not d.is_dir() or not d.name.startswith("game_"):
            continue
        stats_file = d / "turn_stats.jsonl"
        if not stats_file.exists():
            continue
        try:
            lines = [
                json.loads(line)
                for line in stats_file.read_text().splitlines()
                if line.strip()
            ]
        except Exception:
            continue
        if not lines:
            continue
        final = lines[-1]
        ps = final.get("player_stats") or {}
        agg = final.get("aggregate") or {}
        # Slot 0 in matchup is the perspective player
        p0 = ps.get("0") or {}
        games.append({
            "outcome": final.get("outcome"),
            "winner_personality": final.get("winner_personality"),
            "tier_peak": p0.get("tier_peak", 0),
            "peak_unit_tier": p0.get("peak_unit_tier", 0),
            "kills": p0.get("kills", 0),
            "units_lost": p0.get("units_lost", 0),
            "gold_peak": p0.get("gold_peak", 0),
            "total_combats": agg.get("total_combats", 0),
            "techs": p0.get("techs", 0),
            "cities": p0.get("cities", 0),
        })
    return games


def stats(values: list[float]) -> str:
    if not values:
        return "-"
    if len(values) == 1:
        return f"{values[0]:.0f}"
    return f"{statistics.median(values):.0f}"


def main(argv: list[str]) -> int:
    if len(argv) != 2:
        print(f"Usage: {argv[0]} <batch-dir>", file=sys.stderr)
        return 2
    batch = Path(argv[1])
    if not batch.is_dir():
        print(f"Not a directory: {batch}", file=sys.stderr)
        return 2

    rows: list[dict] = []
    for pair_dir in sorted(batch.iterdir()):
        if not pair_dir.is_dir() or "_vs_" not in pair_dir.name:
            continue
        for sub in sorted(pair_dir.iterdir()):
            if not sub.is_dir() or not sub.name.startswith("as_"):
                continue
            perspective = sub.name[len("as_"):]
            games = collect_games(sub)
            if not games:
                continue
            rows.append({
                "pair": pair_dir.name,
                "perspective": perspective,
                "n": len(games),
                "victories": sum(1 for g in games if g["outcome"] == "victory"),
                "perspective_wins": sum(
                    1 for g in games if g["winner_personality"] == perspective
                ),
                "med_tier_peak": statistics.median([g["tier_peak"] for g in games]),
                "med_unit": statistics.median([g["peak_unit_tier"] for g in games]),
                "med_kills": statistics.median([g["kills"] for g in games]),
                "med_lost": statistics.median([g["units_lost"] for g in games]),
                "med_gold": statistics.median([g["gold_peak"] for g in games]),
                "med_combats": statistics.median([g["total_combats"] for g in games]),
                "med_techs": statistics.median([g["techs"] for g in games]),
                "med_cities": statistics.median([g["cities"] for g in games]),
            })

    if not rows:
        print("No completed games found.")
        return 1

    # Print
    hdrs = ["pair", "as", "n", "vic", "wins", "tp", "unit", "kills", "lost", "gold", "comb", "techs", "cit"]
    widths = [32, 12, 3, 3, 4, 3, 4, 6, 5, 7, 6, 6, 4]
    print("  ".join(f"{h:<{w}}" for h, w in zip(hdrs, widths)))
    print("-" * (sum(widths) + 2 * (len(widths) - 1)))
    for r in rows:
        cells = [
            r["pair"], r["perspective"], r["n"],
            r["victories"], r["perspective_wins"],
            int(r["med_tier_peak"]), int(r["med_unit"]),
            int(r["med_kills"]), int(r["med_lost"]),
            int(r["med_gold"]), int(r["med_combats"]),
            int(r["med_techs"]), int(r["med_cities"]),
        ]
        print("  ".join(f"{str(c):<{w}}" for c, w in zip(cells, widths)))

    # Pair deltas across multiple metrics
    print()
    print("=== Per-pair perspective deltas (% relative to max, 0% means convergence) ===")
    pair_data: dict[str, dict[str, dict]] = {}
    for r in rows:
        pair_data.setdefault(r["pair"], {})[r["perspective"]] = r
    print(f"{'pair':<32} {'metric':<12} {'as_a→':<8} {'as_b→':<8} {'delta%':<6}")
    for pair, perspectives in sorted(pair_data.items()):
        a, b = pair.split("_vs_")
        ra, rb = perspectives.get(a), perspectives.get(b)
        if not ra or not rb:
            continue
        for metric in ("med_tier_peak", "med_unit", "med_kills", "med_combats", "perspective_wins"):
            va, vb = ra[metric], rb[metric]
            base = max(va, vb) if max(va, vb) > 0 else 1
            delta = abs(va - vb) / base * 100
            print(f"{pair:<32} {metric:<12} {va:<8.1f} {vb:<8.1f} {delta:<5.1f}")
        print()

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv))