diff --git a/tools/p1-clean-baseline.py b/tools/p1-clean-baseline.py new file mode 100644 index 00000000..3f4ec06c --- /dev/null +++ b/tools/p1-clean-baseline.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +"""p1-clean-baseline.py — D1-literal convergence on a CLEAN (un-juiced) surface. + +⚠️ LIMITATION (verified 2026-06-03): the player-api harness this driver uses +does NOT load a TechWeb (mc-player-api/projection.rs: "available techs from the +TechWeb once [surfaced]" — TRACKED, not done). Consequently `research.researched` +is ALWAYS empty and `tier_peak` is ALWAYS 0 on this surface, so the D1 stall +check (tp<=1) is trivially satisfied and MEANINGLESS here. The symmetric +duel-driving logic below is correct and reusable, but a faithful tier_peak +baseline requires either (a) an AUTO_PLAY_ALL_AI mode in the full Godot autoplay +scene (de-juice slot 0; that scene loads tech via TurnManager/GdTechWeb), or +(b) wiring a TechWeb into the harness dispatch + surfacing researched techs in +the projection. Do NOT cite this tool's tier_peak output as a gate result until +one of those lands. Elimination-turn / city-count output IS valid here. + + +p1-29d gate D1 (operator-ratified literal reading): the trailing AI (slot 1) +must be "eliminated <=T100 OR stalled (alive AND tier_peak<=1)" in 10/10 seeds. + +The apricot autoplay batch surface is INVALID for this gate (operator Q2): its +slot 0 is a juiced harness player (`auto_play.gd` rush-buy / attack-commit / +formation helpers "that one clan wins every game"). This driver removes that +confound by running the SAME 2-player duel matchup the gate measures +(meta.json of batch 20260529_185955: players=2, map_size=duel, map_type=pangaea, +victory=domination) but driving BOTH slots through the identical scripted +`suggest()` chain via the player-api harness — i.e. gate-surface-MINUS-the-juice. +Both slots are `scripted:default`; the matchup is symmetric. + +tier_peak = max tech-era over slot 1's researched techs (same definition as +turn_processor.gd::_player_tier_peak / auto_play.gd), mapped via the tech JSON. + +D1-literal needs only: P1 elimination turn (first turn cities==0 with no +founder) and P1 tier_peak sampled at the last turn <=100. Kills/mil are not +part of the literal gate (the non-factor lens was operator-rejected); recorded +for context only. + +Usage: + tools/p1-clean-baseline.py --seeds 1,2,...,10 --turns 110 \ + [--map-type pangaea] [--map-size duel] [--out runs/clean.json] + +stdlib + the rl_self_play harness only (no SB3 / numpy model load). +""" +from __future__ import annotations + +import argparse +import glob +import json +import sys +from pathlib import Path +from typing import Any + +THIS_DIR = Path(__file__).resolve().parent +REPO_ROOT = THIS_DIR.parent +sys.path.insert(0, str(REPO_ROOT)) + +from tooling.rl_self_play.harness_client import ( # noqa: E402 + HarnessClient, + HarnessConfig, +) +from tooling.rl_self_play.encoders import encode_legal_actions # noqa: E402 +from tooling.rl_self_play.record_expert import _DropStats, _resolve # noqa: E402 + +T100 = 100 +DATA = REPO_ROOT / "public" / "games" / "age-of-dwarves" / "data" + + +def build_tech_era_map() -> dict[str, int]: + era: dict[str, int] = {} + for p in glob.glob(str(DATA / "techs" / "*.json")): + if p.endswith("manifest.json"): + continue + try: + d = json.load(open(p)) + except (json.JSONDecodeError, OSError): + continue + items = d if isinstance(d, list) else [d] + for t in items: + if isinstance(t, dict) and "id" in t and "era" in t: + era[str(t["id"])] = int(t["era"]) + return era + + +def _owned(view: dict[str, Any], key: str, slot: int) -> list[dict[str, Any]]: + return [x for x in view.get(key, []) if int(x.get("owner", -1)) == slot] + + +def _is_founder(u: dict[str, Any]) -> bool: + return "founder" in str(u.get("type", "")) or bool(u.get("can_found_city")) + + +def _tier_peak(view: dict[str, Any], era_map: dict[str, int]) -> int: + researched = view.get("research", {}).get("researched", []) or [] + peak = 0 + for tid in researched: + e = era_map.get(str(tid), 0) + if e > peak: + peak = e + return peak + + +def _advance_slot(client: HarnessClient, slot: int) -> None: + """Apply the scripted suggest() chain for one slot, then end its turn. + Verbatim port of mine_divergence._advance_slot (the proven scripted path).""" + for a in client.suggest(slot=slot): + v = client.view(slot=slot) + _, i2a = encode_legal_actions(v) + idx = _resolve(a, v, i2a, _DropStats()) + if idx is None: + continue + r = i2a[idx] + try: + if r.get("type") == "end_turn": + client.end_turn(slot=slot) + else: + client.act(r, slot=slot) + except Exception: # noqa: BLE001 best-effort advance + break + try: + client.end_turn(slot=slot) + except Exception: # noqa: BLE001 + pass + client.drain_notifications() + + +def run_seed(seed: int, turns: int, map_type: str, map_size: str, + era_map: dict[str, int]) -> dict[str, Any]: + cfg = HarnessConfig( + seed=seed, players=2, player_slots=(0, 1), + map_size=map_size, map_type=map_type, victory_mode="domination", + timeout_sec=120, + ) + client = HarnessClient(cfg) + snap100: dict[str, Any] | None = None + elim_turn: int | None = None + last_turn = 0 + p1_cities_seen_max = 0 + try: + for _ in range(turns): + v1 = client.view(slot=1) + cur = int(v1.get("turn", 0)) + last_turn = cur + cities = _owned(v1, "cities", 1) + units = _owned(v1, "units", 1) + founders = [u for u in units if _is_founder(u)] + p1_cities_seen_max = max(p1_cities_seen_max, len(cities)) + # Elimination: no cities and no founder to re-found. + if not cities and not founders: + elim_turn = cur + break + if cur <= T100: + snap100 = { + "turn": cur, + "cities": len(cities), + "mil": sum(1 for u in units if not _is_founder(u)), + "tp": _tier_peak(v1, era_map), + "pop": int(v1.get("score", {}).get("city_count", 0)), + } + # Past T100 and still alive → literal gate fully determined. + if cur > T100: + break + _advance_slot(client, 0) + _advance_slot(client, 1) + finally: + client.shutdown() + + p1_alive = elim_turn is None + return { + "seed": seed, + "last_turn": last_turn, + "elim_turn": elim_turn, + "p1_alive_at_100": p1_alive, + "snap100": snap100, + "p1_cities_max": p1_cities_seen_max, + } + + +def score_convergence(rows: list[dict[str, Any]]) -> None: + """tier_peak-free D1 convergence verdict (this surface cannot measure + tier_peak — see LIMITATION banner). D1 still has decisive discriminating + power without it: + + CONVERGED — P1 eliminated <= T100. + NON-CONVERGED — P1 alive at T100 with >=2 cities (unambiguously a + developing co-equal peer; neither dead nor stalled, + regardless of era). + AMBIGUOUS — P1 alive at T100 with exactly 1 city. Could be a true + stall (tp<=1) or a held-down developer; needs the + tier_peak surface (AUTO_PLAY_ALL_AI) to resolve. + + The headline question this answers: does convergence happen AT ALL on a + clean (un-juiced) surface, and how many seeds need the heavier tier_peak + build to adjudicate.""" + print(f"\n=== p1-29d D1 convergence on CLEAN symmetric duel (tier_peak-free) ===") + print(f"{len(rows)} seeds (both slots scripted:default via suggest; no juice)") + print(f"NOTE: tier_peak unavailable on this surface (harness has no TechWeb)\n") + hdr = ("seed", "lastT", "elimT", "alive@100", "@T", "c@100", "mil@100", "cmax") + print("{:>4} {:>5} {:>5} {:>9} {:>4} {:>5} {:>7} {:>5}".format(*hdr)) + conv = noncv = ambig = 0 + verdicts = [] + for r in sorted(rows, key=lambda x: x["seed"]): + s = r["snap100"] or {} + elim = r["elim_turn"] + elim_by_100 = (elim is not None and elim <= T100) + c100 = s.get("cities", 0) + alive100 = r["p1_alive_at_100"] and c100 >= 1 + if elim_by_100: + verdict, label = "CONVERGED", "elim<=T100" + conv += 1 + elif alive100 and c100 >= 2: + verdict, label = "NON-CONVERGED", f"alive@100, {c100} cities (peer)" + noncv += 1 + elif alive100 and c100 == 1: + verdict, label = "AMBIGUOUS", "alive@100, 1 city (needs tier_peak)" + ambig += 1 + else: + verdict, label = "NON-CONVERGED", f"elim>T100(@{elim})" + noncv += 1 + verdicts.append((r["seed"], verdict, label)) + print("{:>4} {:>5} {:>5} {:>9} {:>4} {:>5} {:>7} {:>5}".format( + r["seed"], str(r["last_turn"]), + str(elim) if elim is not None else "-", + "yes" if r["p1_alive_at_100"] else "no", + str(s.get("turn", "-")), c100, s.get("mil", "-"), + r["p1_cities_max"])) + n = len(rows) + print(f"\n--- clean convergence shape ---") + print(f" CONVERGED (elim<=T100): {conv}/{n}") + print(f" NON-CONVERGED (peer/late): {noncv}/{n}") + print(f" AMBIGUOUS (1 city, need tp): {ambig}/{n}") + for seed, v, label in verdicts: + print(f" s{seed}: {v:<13} ({label})") + + +def main(argv: list[str]) -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--seeds", default="1,2,3,4,5,6,7,8,9,10") + ap.add_argument("--turns", type=int, default=110) + ap.add_argument("--map-type", default="pangaea") + ap.add_argument("--map-size", default="duel") + ap.add_argument("--out", default="") + args = ap.parse_args(argv[1:]) + seeds = [int(s) for s in args.seeds.split(",") if s.strip()] + era_map = build_tech_era_map() + rows = [] + for seed in seeds: + print(f"[seed {seed}] running clean duel...", file=sys.stderr) + r = run_seed(seed, args.turns, args.map_type, args.map_size, era_map) + rows.append(r) + print(f"[seed {seed}] elim={r['elim_turn']} alive@100={r['p1_alive_at_100']} " + f"snap100={r['snap100']}", file=sys.stderr) + score_d1_literal(rows) + if args.out: + Path(args.out).write_text(json.dumps(rows, indent=2)) + print(f"\nwrote {args.out}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv))