2026-06-03 03:32:40 -07:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
"""p1-clean-baseline.py — D1-literal convergence on a CLEAN (un-juiced) surface.
|
|
|
|
|
|
|
|
|
|
|
|
⚠️ LIMITATION (verified 2026-06-03): the player-api harness this driver uses
|
|
|
|
|
|
does NOT load a TechWeb (mc-player-api/projection.rs: "available techs from the
|
|
|
|
|
|
TechWeb once [surfaced]" — TRACKED, not done). Consequently `research.researched`
|
|
|
|
|
|
is ALWAYS empty and `tier_peak` is ALWAYS 0 on this surface, so the D1 stall
|
|
|
|
|
|
check (tp<=1) is trivially satisfied and MEANINGLESS here. The symmetric
|
|
|
|
|
|
duel-driving logic below is correct and reusable, but a faithful tier_peak
|
|
|
|
|
|
baseline requires either (a) an AUTO_PLAY_ALL_AI mode in the full Godot autoplay
|
|
|
|
|
|
scene (de-juice slot 0; that scene loads tech via TurnManager/GdTechWeb), or
|
|
|
|
|
|
(b) wiring a TechWeb into the harness dispatch + surfacing researched techs in
|
|
|
|
|
|
the projection. Do NOT cite this tool's tier_peak output as a gate result until
|
|
|
|
|
|
one of those lands. Elimination-turn / city-count output IS valid here.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
p1-29d gate D1 (operator-ratified literal reading): the trailing AI (slot 1)
|
|
|
|
|
|
must be "eliminated <=T100 OR stalled (alive AND tier_peak<=1)" in 10/10 seeds.
|
|
|
|
|
|
|
|
|
|
|
|
The apricot autoplay batch surface is INVALID for this gate (operator Q2): its
|
|
|
|
|
|
slot 0 is a juiced harness player (`auto_play.gd` rush-buy / attack-commit /
|
|
|
|
|
|
formation helpers "that one clan wins every game"). This driver removes that
|
|
|
|
|
|
confound by running the SAME 2-player duel matchup the gate measures
|
|
|
|
|
|
(meta.json of batch 20260529_185955: players=2, map_size=duel, map_type=pangaea,
|
|
|
|
|
|
victory=domination) but driving BOTH slots through the identical scripted
|
|
|
|
|
|
`suggest()` chain via the player-api harness — i.e. gate-surface-MINUS-the-juice.
|
|
|
|
|
|
Both slots are `scripted:default`; the matchup is symmetric.
|
|
|
|
|
|
|
|
|
|
|
|
tier_peak = max tech-era over slot 1's researched techs (same definition as
|
|
|
|
|
|
turn_processor.gd::_player_tier_peak / auto_play.gd), mapped via the tech JSON.
|
|
|
|
|
|
|
|
|
|
|
|
D1-literal needs only: P1 elimination turn (first turn cities==0 with no
|
|
|
|
|
|
founder) and P1 tier_peak sampled at the last turn <=100. Kills/mil are not
|
|
|
|
|
|
part of the literal gate (the non-factor lens was operator-rejected); recorded
|
|
|
|
|
|
for context only.
|
|
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
tools/p1-clean-baseline.py --seeds 1,2,...,10 --turns 110 \
|
|
|
|
|
|
[--map-type pangaea] [--map-size duel] [--out runs/clean.json]
|
|
|
|
|
|
|
|
|
|
|
|
stdlib + the rl_self_play harness only (no SB3 / numpy model load).
|
|
|
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
|
import glob
|
|
|
|
|
|
import json
|
|
|
|
|
|
import sys
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
THIS_DIR = Path(__file__).resolve().parent
|
|
|
|
|
|
REPO_ROOT = THIS_DIR.parent
|
|
|
|
|
|
sys.path.insert(0, str(REPO_ROOT))
|
|
|
|
|
|
|
|
|
|
|
|
from tooling.rl_self_play.harness_client import ( # noqa: E402
|
|
|
|
|
|
HarnessClient,
|
|
|
|
|
|
HarnessConfig,
|
|
|
|
|
|
)
|
|
|
|
|
|
from tooling.rl_self_play.encoders import encode_legal_actions # noqa: E402
|
|
|
|
|
|
from tooling.rl_self_play.record_expert import _DropStats, _resolve # noqa: E402
|
|
|
|
|
|
|
|
|
|
|
|
T100 = 100
|
|
|
|
|
|
DATA = REPO_ROOT / "public" / "games" / "age-of-dwarves" / "data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_tech_era_map() -> dict[str, int]:
|
|
|
|
|
|
era: dict[str, int] = {}
|
|
|
|
|
|
for p in glob.glob(str(DATA / "techs" / "*.json")):
|
|
|
|
|
|
if p.endswith("manifest.json"):
|
|
|
|
|
|
continue
|
|
|
|
|
|
try:
|
|
|
|
|
|
d = json.load(open(p))
|
|
|
|
|
|
except (json.JSONDecodeError, OSError):
|
|
|
|
|
|
continue
|
|
|
|
|
|
items = d if isinstance(d, list) else [d]
|
|
|
|
|
|
for t in items:
|
|
|
|
|
|
if isinstance(t, dict) and "id" in t and "era" in t:
|
|
|
|
|
|
era[str(t["id"])] = int(t["era"])
|
|
|
|
|
|
return era
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _owned(view: dict[str, Any], key: str, slot: int) -> list[dict[str, Any]]:
|
|
|
|
|
|
return [x for x in view.get(key, []) if int(x.get("owner", -1)) == slot]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_founder(u: dict[str, Any]) -> bool:
|
|
|
|
|
|
return "founder" in str(u.get("type", "")) or bool(u.get("can_found_city"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tier_peak(view: dict[str, Any], era_map: dict[str, int]) -> int:
|
|
|
|
|
|
researched = view.get("research", {}).get("researched", []) or []
|
|
|
|
|
|
peak = 0
|
|
|
|
|
|
for tid in researched:
|
|
|
|
|
|
e = era_map.get(str(tid), 0)
|
|
|
|
|
|
if e > peak:
|
|
|
|
|
|
peak = e
|
|
|
|
|
|
return peak
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _advance_slot(client: HarnessClient, slot: int) -> None:
|
|
|
|
|
|
"""Apply the scripted suggest() chain for one slot, then end its turn.
|
|
|
|
|
|
Verbatim port of mine_divergence._advance_slot (the proven scripted path)."""
|
|
|
|
|
|
for a in client.suggest(slot=slot):
|
|
|
|
|
|
v = client.view(slot=slot)
|
|
|
|
|
|
_, i2a = encode_legal_actions(v)
|
|
|
|
|
|
idx = _resolve(a, v, i2a, _DropStats())
|
|
|
|
|
|
if idx is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
r = i2a[idx]
|
|
|
|
|
|
try:
|
|
|
|
|
|
if r.get("type") == "end_turn":
|
|
|
|
|
|
client.end_turn(slot=slot)
|
|
|
|
|
|
else:
|
|
|
|
|
|
client.act(r, slot=slot)
|
|
|
|
|
|
except Exception: # noqa: BLE001 best-effort advance
|
|
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
|
|
client.end_turn(slot=slot)
|
|
|
|
|
|
except Exception: # noqa: BLE001
|
|
|
|
|
|
pass
|
|
|
|
|
|
client.drain_notifications()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_seed(seed: int, turns: int, map_type: str, map_size: str,
|
|
|
|
|
|
era_map: dict[str, int]) -> dict[str, Any]:
|
|
|
|
|
|
cfg = HarnessConfig(
|
|
|
|
|
|
seed=seed, players=2, player_slots=(0, 1),
|
|
|
|
|
|
map_size=map_size, map_type=map_type, victory_mode="domination",
|
|
|
|
|
|
timeout_sec=120,
|
|
|
|
|
|
)
|
|
|
|
|
|
client = HarnessClient(cfg)
|
|
|
|
|
|
snap100: dict[str, Any] | None = None
|
|
|
|
|
|
elim_turn: int | None = None
|
|
|
|
|
|
last_turn = 0
|
|
|
|
|
|
p1_cities_seen_max = 0
|
|
|
|
|
|
try:
|
|
|
|
|
|
for _ in range(turns):
|
|
|
|
|
|
v1 = client.view(slot=1)
|
|
|
|
|
|
cur = int(v1.get("turn", 0))
|
|
|
|
|
|
last_turn = cur
|
|
|
|
|
|
cities = _owned(v1, "cities", 1)
|
|
|
|
|
|
units = _owned(v1, "units", 1)
|
|
|
|
|
|
founders = [u for u in units if _is_founder(u)]
|
|
|
|
|
|
p1_cities_seen_max = max(p1_cities_seen_max, len(cities))
|
|
|
|
|
|
# Elimination: no cities and no founder to re-found.
|
|
|
|
|
|
if not cities and not founders:
|
|
|
|
|
|
elim_turn = cur
|
|
|
|
|
|
break
|
|
|
|
|
|
if cur <= T100:
|
|
|
|
|
|
snap100 = {
|
|
|
|
|
|
"turn": cur,
|
|
|
|
|
|
"cities": len(cities),
|
|
|
|
|
|
"mil": sum(1 for u in units if not _is_founder(u)),
|
|
|
|
|
|
"tp": _tier_peak(v1, era_map),
|
|
|
|
|
|
"pop": int(v1.get("score", {}).get("city_count", 0)),
|
|
|
|
|
|
}
|
|
|
|
|
|
# Past T100 and still alive → literal gate fully determined.
|
|
|
|
|
|
if cur > T100:
|
|
|
|
|
|
break
|
|
|
|
|
|
_advance_slot(client, 0)
|
|
|
|
|
|
_advance_slot(client, 1)
|
|
|
|
|
|
finally:
|
|
|
|
|
|
client.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
p1_alive = elim_turn is None
|
|
|
|
|
|
return {
|
|
|
|
|
|
"seed": seed,
|
|
|
|
|
|
"last_turn": last_turn,
|
|
|
|
|
|
"elim_turn": elim_turn,
|
|
|
|
|
|
"p1_alive_at_100": p1_alive,
|
|
|
|
|
|
"snap100": snap100,
|
|
|
|
|
|
"p1_cities_max": p1_cities_seen_max,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def score_convergence(rows: list[dict[str, Any]]) -> None:
|
|
|
|
|
|
"""tier_peak-free D1 convergence verdict (this surface cannot measure
|
|
|
|
|
|
tier_peak — see LIMITATION banner). D1 still has decisive discriminating
|
|
|
|
|
|
power without it:
|
|
|
|
|
|
|
|
|
|
|
|
CONVERGED — P1 eliminated <= T100.
|
|
|
|
|
|
NON-CONVERGED — P1 alive at T100 with >=2 cities (unambiguously a
|
|
|
|
|
|
developing co-equal peer; neither dead nor stalled,
|
|
|
|
|
|
regardless of era).
|
|
|
|
|
|
AMBIGUOUS — P1 alive at T100 with exactly 1 city. Could be a true
|
|
|
|
|
|
stall (tp<=1) or a held-down developer; needs the
|
|
|
|
|
|
tier_peak surface (AUTO_PLAY_ALL_AI) to resolve.
|
|
|
|
|
|
|
|
|
|
|
|
The headline question this answers: does convergence happen AT ALL on a
|
|
|
|
|
|
clean (un-juiced) surface, and how many seeds need the heavier tier_peak
|
|
|
|
|
|
build to adjudicate."""
|
|
|
|
|
|
print(f"\n=== p1-29d D1 convergence on CLEAN symmetric duel (tier_peak-free) ===")
|
|
|
|
|
|
print(f"{len(rows)} seeds (both slots scripted:default via suggest; no juice)")
|
|
|
|
|
|
print(f"NOTE: tier_peak unavailable on this surface (harness has no TechWeb)\n")
|
|
|
|
|
|
hdr = ("seed", "lastT", "elimT", "alive@100", "@T", "c@100", "mil@100", "cmax")
|
|
|
|
|
|
print("{:>4} {:>5} {:>5} {:>9} {:>4} {:>5} {:>7} {:>5}".format(*hdr))
|
|
|
|
|
|
conv = noncv = ambig = 0
|
|
|
|
|
|
verdicts = []
|
|
|
|
|
|
for r in sorted(rows, key=lambda x: x["seed"]):
|
|
|
|
|
|
s = r["snap100"] or {}
|
|
|
|
|
|
elim = r["elim_turn"]
|
|
|
|
|
|
elim_by_100 = (elim is not None and elim <= T100)
|
|
|
|
|
|
c100 = s.get("cities", 0)
|
|
|
|
|
|
alive100 = r["p1_alive_at_100"] and c100 >= 1
|
|
|
|
|
|
if elim_by_100:
|
|
|
|
|
|
verdict, label = "CONVERGED", "elim<=T100"
|
|
|
|
|
|
conv += 1
|
|
|
|
|
|
elif alive100 and c100 >= 2:
|
|
|
|
|
|
verdict, label = "NON-CONVERGED", f"alive@100, {c100} cities (peer)"
|
|
|
|
|
|
noncv += 1
|
|
|
|
|
|
elif alive100 and c100 == 1:
|
|
|
|
|
|
verdict, label = "AMBIGUOUS", "alive@100, 1 city (needs tier_peak)"
|
|
|
|
|
|
ambig += 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
verdict, label = "NON-CONVERGED", f"elim>T100(@{elim})"
|
|
|
|
|
|
noncv += 1
|
|
|
|
|
|
verdicts.append((r["seed"], verdict, label))
|
|
|
|
|
|
print("{:>4} {:>5} {:>5} {:>9} {:>4} {:>5} {:>7} {:>5}".format(
|
|
|
|
|
|
r["seed"], str(r["last_turn"]),
|
|
|
|
|
|
str(elim) if elim is not None else "-",
|
|
|
|
|
|
"yes" if r["p1_alive_at_100"] else "no",
|
|
|
|
|
|
str(s.get("turn", "-")), c100, s.get("mil", "-"),
|
|
|
|
|
|
r["p1_cities_max"]))
|
|
|
|
|
|
n = len(rows)
|
|
|
|
|
|
print(f"\n--- clean convergence shape ---")
|
|
|
|
|
|
print(f" CONVERGED (elim<=T100): {conv}/{n}")
|
|
|
|
|
|
print(f" NON-CONVERGED (peer/late): {noncv}/{n}")
|
|
|
|
|
|
print(f" AMBIGUOUS (1 city, need tp): {ambig}/{n}")
|
|
|
|
|
|
for seed, v, label in verdicts:
|
|
|
|
|
|
print(f" s{seed}: {v:<13} ({label})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(argv: list[str]) -> int:
|
|
|
|
|
|
ap = argparse.ArgumentParser()
|
|
|
|
|
|
ap.add_argument("--seeds", default="1,2,3,4,5,6,7,8,9,10")
|
|
|
|
|
|
ap.add_argument("--turns", type=int, default=110)
|
|
|
|
|
|
ap.add_argument("--map-type", default="pangaea")
|
|
|
|
|
|
ap.add_argument("--map-size", default="duel")
|
|
|
|
|
|
ap.add_argument("--out", default="")
|
|
|
|
|
|
args = ap.parse_args(argv[1:])
|
|
|
|
|
|
seeds = [int(s) for s in args.seeds.split(",") if s.strip()]
|
|
|
|
|
|
era_map = build_tech_era_map()
|
|
|
|
|
|
rows = []
|
|
|
|
|
|
for seed in seeds:
|
|
|
|
|
|
print(f"[seed {seed}] running clean duel...", file=sys.stderr)
|
|
|
|
|
|
r = run_seed(seed, args.turns, args.map_type, args.map_size, era_map)
|
|
|
|
|
|
rows.append(r)
|
|
|
|
|
|
print(f"[seed {seed}] elim={r['elim_turn']} alive@100={r['p1_alive_at_100']} "
|
|
|
|
|
|
f"snap100={r['snap100']}", file=sys.stderr)
|
2026-06-03 03:50:10 -07:00
|
|
|
|
score_convergence(rows)
|
2026-06-03 03:32:40 -07:00
|
|
|
|
if args.out:
|
|
|
|
|
|
Path(args.out).write_text(json.dumps(rows, indent=2))
|
|
|
|
|
|
print(f"\nwrote {args.out}")
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
raise SystemExit(main(sys.argv))
|