130 lines
6.1 KiB
Python
Executable file
130 lines
6.1 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Full 4X checklist verification for a 3-seed autoplay batch.
|
|
|
|
Reads a batch dir from tools/autoplay-batch.sh and emits a markdown table
|
|
of metric | value | target | PASS/FAIL against the STOP-criterion thresholds.
|
|
|
|
Usage: tools/checklist-report.py <batch_dir>
|
|
"""
|
|
from __future__ import annotations
|
|
import json, statistics, sys
|
|
from pathlib import Path
|
|
|
|
|
|
def _jsonl(p: Path) -> list[dict]:
|
|
if not p.exists():
|
|
return []
|
|
out = []
|
|
for ln in p.read_text().splitlines():
|
|
ln = ln.strip()
|
|
if ln:
|
|
try: out.append(json.loads(ln))
|
|
except json.JSONDecodeError: pass
|
|
return out
|
|
|
|
|
|
def _collect(gd: Path) -> dict:
|
|
stats = _jsonl(gd / "turn_stats.jsonl")
|
|
events = _jsonl(gd / "events.jsonl")
|
|
final = stats[-1] if stats else {}
|
|
agg, pstats = final.get("aggregate", {}), final.get("player_stats", {})
|
|
ev = {}
|
|
for e in events:
|
|
ev[e.get("type", "")] = ev.get(e.get("type", ""), 0) + 1
|
|
happy_distinct = max(
|
|
len({s["player_stats"].get(pid, {}).get("happiness", 0) for s in stats if "player_stats" in s})
|
|
for pid in ("0", "1")
|
|
) if stats else 0
|
|
p0_ok = p1_ok = False
|
|
for s in stats:
|
|
if s.get("turn", 0) > 100: break
|
|
p0 = s.get("player_stats", {}).get("0", {})
|
|
p1 = s.get("player_stats", {}).get("1", {})
|
|
if p0.get("pop", 0) >= 5 and p0.get("mil", 0) >= 4: p0_ok = True
|
|
if p1.get("pop", 0) >= 5 and p1.get("mil", 0) >= 4: p1_ok = True
|
|
inv = sum(len(s.get("invariant_violations", [])) for s in stats)
|
|
log = gd / "game.log"
|
|
errs = sum(1 for ln in log.read_text().splitlines() if "SCRIPT ERROR" in ln) if log.exists() else 0
|
|
return {
|
|
"turns": final.get("turn", 0), "outcome": final.get("outcome", "?"),
|
|
"pop_peak": max(pstats.get("0", {}).get("pop_peak", 0), pstats.get("1", {}).get("pop_peak", 0)),
|
|
"p0_tiles": pstats.get("0", {}).get("tiles", 0),
|
|
"p0_techs": pstats.get("0", {}).get("techs", 0),
|
|
"combats": agg.get("total_combats", 0),
|
|
"happy_distinct": happy_distinct,
|
|
"imp_events": ev.get("improvement_built", 0),
|
|
"loot_events": ev.get("loot_dropped", 0),
|
|
"gate_events": ev.get("resource_gate_rejected", 0),
|
|
"both_p100": p0_ok and p1_ok, "invariants": inv, "script_errors": errs,
|
|
}
|
|
|
|
|
|
def _row(label, value, target, ok) -> str:
|
|
return f"| {label} | {value} | {target} | {'PASS' if ok else 'FAIL'} |"
|
|
|
|
|
|
def main(argv: list[str]) -> int:
|
|
if len(argv) != 2:
|
|
print("usage: checklist-report.py <batch_dir>", file=sys.stderr); return 2
|
|
batch = Path(argv[1])
|
|
if not batch.is_dir():
|
|
print(f"ERROR: {batch} is not a directory", file=sys.stderr); return 2
|
|
games = sorted(
|
|
[(int(d.name.rsplit("_seed", 1)[1]), d) for d in batch.iterdir()
|
|
if d.is_dir() and d.name.startswith("game_") and d.name.rsplit("_seed", 1)[-1].isdigit()]
|
|
)
|
|
if not games:
|
|
print(f"ERROR: no games under {batch}", file=sys.stderr); return 1
|
|
results = [(s, _collect(d)) for s, d in games]
|
|
n = len(results)
|
|
med = lambda k: statistics.median([r[k] for _, r in results])
|
|
vics = [r for _, r in results if r["outcome"] == "victory"]
|
|
vic_pct = 100 * len(vics) / n
|
|
med_ttv = statistics.median([r["turns"] for r in vics]) if vics else 0
|
|
imp_total = sum(r["imp_events"] for _, r in results)
|
|
loot_total = sum(r["loot_events"] for _, r in results)
|
|
gate_total = sum(r["gate_events"] for _, r in results)
|
|
both = sum(1 for _, r in results if r["both_p100"])
|
|
inv = sum(r["invariants"] for _, r in results)
|
|
errs = sum(r["script_errors"] for _, r in results)
|
|
|
|
rows = [
|
|
f"# FULL 4X CHECKLIST — batch `{batch.name}`",
|
|
f"\n**Games:** {n} **Seeds:** {[s for s, _ in results]}\n",
|
|
"| Metric | Value | Target | Result |", "|---|---|---|---|",
|
|
"| **CORE** | | | |",
|
|
_row("pop_peak median", f"{med('pop_peak'):.0f}", ">=8", med("pop_peak") >= 8),
|
|
_row("victories", f"{len(vics)}/{n} ({vic_pct:.0f}%)", "50-80%", 50 <= vic_pct <= 80),
|
|
_row("median TTV", f"{med_ttv:.0f}" if vics else "n/a", "200-350", (not vics) or 200 <= med_ttv <= 350),
|
|
_row("median combats", f"{med('combats'):.0f}", ">=120", med("combats") >= 120),
|
|
_row("median p0_tiles", f"{med('p0_tiles'):.0f}", ">=20", med("p0_tiles") >= 20),
|
|
_row("median p0_techs", f"{med('p0_techs'):.0f}", ">=20", med("p0_techs") >= 20),
|
|
"| **SYSTEMS** | | | |",
|
|
_row("strategic resources gate", f"{gate_total} rejections", ">=1", gate_total >= 1),
|
|
_row("luxury happiness varies", f"min distinct={min(r['happy_distinct'] for _, r in results)}",
|
|
">=3 distinct/seed", all(r["happy_distinct"] >= 3 for _, r in results)),
|
|
_row("improvement_built total", imp_total, ">=5", imp_total >= 5),
|
|
_row("loot_dropped total", loot_total, ">=1", loot_total >= 1),
|
|
_row("worker improvements/seed (min)", min(r["imp_events"] for _, r in results),
|
|
">=5/seed", all(r["imp_events"] >= 5 for _, r in results)),
|
|
"| **QUALITY** | | | |",
|
|
_row("both players pop>=5 mil>=4 by T100", f"{both}/{n} seeds", ">=2 seeds", both >= 2),
|
|
_row("invariant violations", inv, "0", inv == 0),
|
|
_row("SCRIPT ERRORs in logs", errs, "0", errs == 0),
|
|
]
|
|
passes = sum(1 for r in rows if r.endswith("PASS |"))
|
|
fails = sum(1 for r in rows if r.endswith("FAIL |"))
|
|
rows.append(f"\n**Pass: {passes} Fail: {fails}**\n")
|
|
rows.append("## Per-seed detail\n")
|
|
rows.append("| Seed | Outcome | Turns | Pop | Combats | Techs | Tiles | Imp | Loot | BothP100 | Inv | Errs |")
|
|
rows.append("|---|---|---|---|---|---|---|---|---|---|---|---|")
|
|
for s, r in results:
|
|
rows.append(f"| {s} | {r['outcome']} | {r['turns']} | {r['pop_peak']} | {r['combats']} | "
|
|
f"{r['p0_techs']} | {r['p0_tiles']} | {r['imp_events']} | {r['loot_events']} | "
|
|
f"{r['both_p100']} | {r['invariants']} | {r['script_errors']} |")
|
|
print("\n".join(rows))
|
|
return 0 if fails == 0 else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main(sys.argv))
|