magicciv/tools/matchup-metrics-report.py
Natalie 8e3107b92a feat(@projects/@magic-civilization): update tech-tree and mcts service implementation
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-25 22:48:40 -07:00

146 lines
5.4 KiB
Python
Executable file

#!/usr/bin/env python3
"""matchup-metrics-report.py — multi-metric per-pair report for matchup-grid batches.
Computes per-perspective averages for: tier_peak, peak_unit_tier, kills, units_lost,
gold_peak, total_combats. Helps determine if personality scorer drives differentiation
on metrics other than the (often-converging) tier_peak ceiling.
Usage:
python3 tools/matchup-metrics-report.py <batch-dir>
"""
from __future__ import annotations
import json
import statistics
import sys
from pathlib import Path
def collect_games(sub: Path) -> list[dict]:
games: list[dict] = []
for d in sorted(sub.iterdir()):
if not d.is_dir() or not d.name.startswith("game_"):
continue
stats_file = d / "turn_stats.jsonl"
if not stats_file.exists():
continue
try:
lines = [
json.loads(line)
for line in stats_file.read_text().splitlines()
if line.strip()
]
except Exception:
continue
if not lines:
continue
final = lines[-1]
ps = final.get("player_stats") or {}
agg = final.get("aggregate") or {}
# Slot 0 in matchup is the perspective player
p0 = ps.get("0") or {}
games.append({
"outcome": final.get("outcome"),
"winner_personality": final.get("winner_personality"),
"tier_peak": p0.get("tier_peak", 0),
"peak_unit_tier": p0.get("peak_unit_tier", 0),
"kills": p0.get("kills", 0),
"units_lost": p0.get("units_lost", 0),
"gold_peak": p0.get("gold_peak", 0),
"total_combats": agg.get("total_combats", 0),
"techs": p0.get("techs", 0),
"cities": p0.get("cities", 0),
})
return games
def stats(values: list[float]) -> str:
if not values:
return "-"
if len(values) == 1:
return f"{values[0]:.0f}"
return f"{statistics.median(values):.0f}"
def main(argv: list[str]) -> int:
if len(argv) != 2:
print(f"Usage: {argv[0]} <batch-dir>", file=sys.stderr)
return 2
batch = Path(argv[1])
if not batch.is_dir():
print(f"Not a directory: {batch}", file=sys.stderr)
return 2
rows: list[dict] = []
for pair_dir in sorted(batch.iterdir()):
if not pair_dir.is_dir() or "_vs_" not in pair_dir.name:
continue
for sub in sorted(pair_dir.iterdir()):
if not sub.is_dir() or not sub.name.startswith("as_"):
continue
perspective = sub.name[len("as_"):]
games = collect_games(sub)
if not games:
continue
rows.append({
"pair": pair_dir.name,
"perspective": perspective,
"n": len(games),
"victories": sum(1 for g in games if g["outcome"] == "victory"),
"perspective_wins": sum(
1 for g in games if g["winner_personality"] == perspective
),
"med_tier_peak": statistics.median([g["tier_peak"] for g in games]),
"med_unit": statistics.median([g["peak_unit_tier"] for g in games]),
"med_kills": statistics.median([g["kills"] for g in games]),
"med_lost": statistics.median([g["units_lost"] for g in games]),
"med_gold": statistics.median([g["gold_peak"] for g in games]),
"med_combats": statistics.median([g["total_combats"] for g in games]),
"med_techs": statistics.median([g["techs"] for g in games]),
"med_cities": statistics.median([g["cities"] for g in games]),
})
if not rows:
print("No completed games found.")
return 1
# Print
hdrs = ["pair", "as", "n", "vic", "wins", "tp", "unit", "kills", "lost", "gold", "comb", "techs", "cit"]
widths = [32, 12, 3, 3, 4, 3, 4, 6, 5, 7, 6, 6, 4]
print(" ".join(f"{h:<{w}}" for h, w in zip(hdrs, widths)))
print("-" * (sum(widths) + 2 * (len(widths) - 1)))
for r in rows:
cells = [
r["pair"], r["perspective"], r["n"],
r["victories"], r["perspective_wins"],
int(r["med_tier_peak"]), int(r["med_unit"]),
int(r["med_kills"]), int(r["med_lost"]),
int(r["med_gold"]), int(r["med_combats"]),
int(r["med_techs"]), int(r["med_cities"]),
]
print(" ".join(f"{str(c):<{w}}" for c, w in zip(cells, widths)))
# Pair deltas across multiple metrics
print()
print("=== Per-pair perspective deltas (% relative to max, 0% means convergence) ===")
pair_data: dict[str, dict[str, dict]] = {}
for r in rows:
pair_data.setdefault(r["pair"], {})[r["perspective"]] = r
print(f"{'pair':<32} {'metric':<12} {'as_a→':<8} {'as_b→':<8} {'delta%':<6}")
for pair, perspectives in sorted(pair_data.items()):
a, b = pair.split("_vs_")
ra, rb = perspectives.get(a), perspectives.get(b)
if not ra or not rb:
continue
for metric in ("med_tier_peak", "med_unit", "med_kills", "med_combats", "perspective_wins"):
va, vb = ra[metric], rb[metric]
base = max(va, vb) if max(va, vb) > 0 else 1
delta = abs(va - vb) / base * 100
print(f"{pair:<32} {metric:<12} {va:<8.1f} {vb:<8.1f} {delta:<5.1f}")
print()
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))