144 lines
4.9 KiB
Python
144 lines
4.9 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""matchup-grid-report.py — analyze a matchup-grid batch on the local FS.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python3 tools/matchup-grid-report.py <local-or-remote-batch-dir>
|
||
|
|
|
||
|
|
When the batch lives on apricot, fetch via:
|
||
|
|
ssh apricot 'cd ~/Code/@projects/@magic-civilization && python3 tools/matchup-grid-report.py .local/iter/matchup-grid-<stamp>/'
|
||
|
|
|
||
|
|
Outputs:
|
||
|
|
- per-pair winner-tier-peak medians for both perspectives
|
||
|
|
- per-pair tier_peak delta with PASS/fail at >=10% gate
|
||
|
|
- aggregate (n pairs >=10% delta, median delta, mean delta)
|
||
|
|
- the two named gate pairs from warcouncil.md (ironhold_vs_goldvein, blackhammer_vs_runesmith)
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import glob
|
||
|
|
import json
|
||
|
|
import statistics
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
GATE_PAIRS = [
|
||
|
|
("ironhold_vs_goldvein", "ironhold", "goldvein"),
|
||
|
|
("blackhammer_vs_runesmith", "blackhammer", "runesmith"),
|
||
|
|
]
|
||
|
|
GATE_THRESHOLD_PCT = 10.0
|
||
|
|
|
||
|
|
|
||
|
|
def collect(batch_dir: Path) -> dict[tuple[str, str], dict]:
|
||
|
|
results: dict[tuple[str, str], dict] = {}
|
||
|
|
for pair_dir in sorted(batch_dir.iterdir()):
|
||
|
|
if not pair_dir.is_dir():
|
||
|
|
continue
|
||
|
|
pair = pair_dir.name
|
||
|
|
if "_vs_" not in pair:
|
||
|
|
continue
|
||
|
|
for sub in sorted(pair_dir.iterdir()):
|
||
|
|
if not sub.is_dir() or not sub.name.startswith("as_"):
|
||
|
|
continue
|
||
|
|
clan = sub.name[len("as_"):]
|
||
|
|
wtps: list[int] = []
|
||
|
|
vics = total = 0
|
||
|
|
for game in sorted(sub.iterdir()):
|
||
|
|
if not game.is_dir():
|
||
|
|
continue
|
||
|
|
stats_file = game / "turn_stats.jsonl"
|
||
|
|
if not stats_file.exists():
|
||
|
|
continue
|
||
|
|
try:
|
||
|
|
lines = [
|
||
|
|
json.loads(line)
|
||
|
|
for line in stats_file.read_text().splitlines()
|
||
|
|
if line.strip()
|
||
|
|
]
|
||
|
|
except Exception:
|
||
|
|
continue
|
||
|
|
if not lines:
|
||
|
|
continue
|
||
|
|
total += 1
|
||
|
|
final = lines[-1]
|
||
|
|
if final.get("outcome") == "victory":
|
||
|
|
vics += 1
|
||
|
|
ps = final.get("player_stats") or {}
|
||
|
|
if ps:
|
||
|
|
wtps.append(max((d.get("tier_peak", 0) for d in ps.values()), default=0))
|
||
|
|
results[(pair, clan)] = {
|
||
|
|
"med": statistics.median(wtps) if wtps else 0,
|
||
|
|
"vics": vics,
|
||
|
|
"total": total,
|
||
|
|
"wtps": wtps,
|
||
|
|
}
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
def report(results: dict[tuple[str, str], dict]) -> int:
|
||
|
|
print(f'{"pair":<35} {"a→med (vic)":<24} {"b→med (vic)":<24} {"delta%":<8} {"verdict":<5}')
|
||
|
|
print("-" * 100)
|
||
|
|
|
||
|
|
seen: set[str] = set()
|
||
|
|
deltas: list[float] = []
|
||
|
|
for (pair, _clan), _data in sorted(results.items()):
|
||
|
|
if pair in seen:
|
||
|
|
continue
|
||
|
|
seen.add(pair)
|
||
|
|
a, b = pair.split("_vs_")
|
||
|
|
da, db = results.get((pair, a)), results.get((pair, b))
|
||
|
|
if not da or not db:
|
||
|
|
print(f"{pair:<35} {'(missing perspective)':<58}")
|
||
|
|
continue
|
||
|
|
ma, mb = da["med"], db["med"]
|
||
|
|
base = max(ma, mb) if max(ma, mb) > 0 else 1
|
||
|
|
delta = abs(ma - mb) / base * 100
|
||
|
|
deltas.append(delta)
|
||
|
|
a_str = f"{a}={ma} ({da['vics']}/{da['total']})"
|
||
|
|
b_str = f"{b}={mb} ({db['vics']}/{db['total']})"
|
||
|
|
verdict = "PASS" if delta >= GATE_THRESHOLD_PCT else "fail"
|
||
|
|
print(f"{pair:<35} {a_str:<24} {b_str:<24} {delta:<7.1f} {verdict:<5}")
|
||
|
|
|
||
|
|
print()
|
||
|
|
if deltas:
|
||
|
|
passing = sum(1 for d in deltas if d >= GATE_THRESHOLD_PCT)
|
||
|
|
print(f"Aggregate: {passing}/{len(deltas)} pairs >= {GATE_THRESHOLD_PCT:.0f}% delta")
|
||
|
|
print(f"Median delta: {statistics.median(deltas):.1f}%")
|
||
|
|
print(f"Mean delta: {statistics.mean(deltas):.1f}%")
|
||
|
|
else:
|
||
|
|
print("No deltas computed — no completed pairs.")
|
||
|
|
|
||
|
|
print()
|
||
|
|
print(f"=== p0-02 warcouncil-named gate pairs (gate >= {GATE_THRESHOLD_PCT:.0f}%) ===")
|
||
|
|
failures = 0
|
||
|
|
for pair, c1, c2 in GATE_PAIRS:
|
||
|
|
a = results.get((pair, c1), {}).get("med", 0)
|
||
|
|
b = results.get((pair, c2), {}).get("med", 0)
|
||
|
|
if a == 0 and b == 0:
|
||
|
|
print(f"{pair}: NOT YET RUN")
|
||
|
|
failures += 1
|
||
|
|
continue
|
||
|
|
base = max(a, b) if max(a, b) > 0 else 1
|
||
|
|
delta = abs(a - b) / base * 100
|
||
|
|
verdict = "PASS" if delta >= GATE_THRESHOLD_PCT else "FAIL"
|
||
|
|
if verdict == "FAIL":
|
||
|
|
failures += 1
|
||
|
|
print(f"{pair}: {c1}={a} {c2}={b} delta={delta:.1f}% {verdict}")
|
||
|
|
|
||
|
|
return 1 if failures else 0
|
||
|
|
|
||
|
|
|
||
|
|
def main(argv: list[str]) -> int:
|
||
|
|
if len(argv) != 2:
|
||
|
|
print(f"Usage: {argv[0]} <batch-dir>", file=sys.stderr)
|
||
|
|
return 2
|
||
|
|
batch = Path(argv[1])
|
||
|
|
if not batch.is_dir():
|
||
|
|
print(f"Not a directory: {batch}", file=sys.stderr)
|
||
|
|
return 2
|
||
|
|
results = collect(batch)
|
||
|
|
return report(results)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
sys.exit(main(sys.argv))
|