From 60c8ce0ef6c75b35dc691d9bbc1a5bf1c90cfb46 Mon Sep 17 00:00:00 2001 From: Natalie Date: Tue, 23 Jun 2026 18:48:37 -0400 Subject: [PATCH] =?UTF-8?q?fix(simulator):=20=F0=9F=90=9B=20AI/suggest=20p?= =?UTF-8?q?roduction=20city=5Fid=20round-trip=20+=20restore=20gdext=20buil?= =?UTF-8?q?d?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exposed by a new hotseat full-game driver (drives both player seats over the multi-slot wire, no AI dependency) — a 31-turn 2-player game surfaced these. - mc-player-api: the AI→PlayerAction converter (apply_ai_action + the suggest sibling) emitted the bare tactical city index ("0") for QueueProduction, but find_city_indices needs the projector wire id "{player}_{c_idx}" — so every AI/suggested queue_production failed UnknownCity. This silently broke the in-box AI's production-steering, not just the wire. Emit the wire id at all three sites; thread slot into the suggest converter; add a regression test. Result in the playthrough: roundtrip failures 58→1, city_building_completed 0→18. - api-gdext: advance_round_phase/end_player_round_phase did not compile at HEAD — godot-rust 0.2.4 Array::push needs &Dictionary (AsArg); Pcg64 builds via ::seed not ::seed_from_u64; dropped a dead rng binding. The gdext crate could not be rebuilt from source until this. - mc-worldsim: pub use GamePhase/RoundPhase (api-gdext references them through mc_worldsim; they were a private re-export → E0603). - tooling: add hotseat_playthrough.py — applies each seat's suggested actions and flags any offered action that fails to apply, with severity triage. Co-Authored-By: Claude Opus 4.8 --- src/simulator/api-gdext/src/lib.rs | 45 ++- .../crates/mc-player-api/src/dispatch.rs | 50 +++- src/simulator/crates/mc-worldsim/src/lib.rs | 5 +- tooling/rl_self_play/hotseat_playthrough.py | 258 ++++++++++++++++++ 4 files changed, 329 insertions(+), 29 deletions(-) create mode 100644 tooling/rl_self_play/hotseat_playthrough.py diff --git a/src/simulator/api-gdext/src/lib.rs b/src/simulator/api-gdext/src/lib.rs index 8e842b7b..1442e3e9 100644 --- a/src/simulator/api-gdext/src/lib.rs +++ b/src/simulator/api-gdext/src/lib.rs @@ -9761,9 +9761,8 @@ impl GdWorldSim { if state.game_phase == mc_worldsim::GamePhase::GameStart { if state.turn_order.is_empty() && !state.players.is_empty() { let order_seed = mc_core::seed::derive_step(state.map_seed, mc_core::seed::SeedDomain::GameSetup, &[]); - let mut rng = mc_core::seed::Pcg64::seed_from_u64(order_seed); state.turn_order = (0..state.players.len() as u8).collect(); - // fisher yates inline (small) + // Fisher-Yates via per-step derive (deterministic; no live RNG handle needed). for i in (1..state.turn_order.len()).rev() { let r = (mc_core::seed::derive_step(order_seed, mc_core::seed::SeedDomain::GameSetup, &[i as u64]) as usize) % (i + 1); state.turn_order.swap(i, r); @@ -9771,13 +9770,13 @@ impl GdWorldSim { } let from = state.game_phase; state.game_phase = mc_worldsim::GamePhase::InProgress; - out.push(self.phase_event_dict("game_phase_changed", Some(&from), Some(&state.game_phase), None, None, None)); + out.push(&self.phase_event_dict("game_phase_changed", Some(&from), Some(&state.game_phase), None, None, None)); state.round_phase = mc_worldsim::RoundPhase::PlayerRound { order_pos: 0 }; if let Some(&slot) = state.turn_order.first() { state.current_player_index = slot; } - out.push(self.phase_event_dict("round_started", None, None, Some(state.turn), None, None)); - out.push(self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(0))); + out.push(&self.phase_event_dict("round_started", None, None, Some(state.turn), None, None)); + out.push(&self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(0))); return out; } if state.game_phase != mc_worldsim::GamePhase::InProgress { @@ -9786,35 +9785,35 @@ impl GdWorldSim { match state.round_phase { mc_worldsim::RoundPhase::PlayerRound { order_pos } => { let slot = state.turn_order.get(order_pos as usize).copied().unwrap_or(0); - out.push(self.phase_event_dict("player_round_ended", None, None, None, Some(slot), Some(order_pos))); + out.push(&self.phase_event_dict("player_round_ended", None, None, None, Some(slot), Some(order_pos))); let next_p = state.round_phase.next(state.turn_order.len() as u8); state.round_phase = next_p; if let mc_worldsim::RoundPhase::PlayerRound { order_pos: np } = next_p { if let Some(&ns) = state.turn_order.get(np as usize) { state.current_player_index = ns; } - out.push(self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(np))); + out.push(&self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(np))); } else if next_p == mc_worldsim::RoundPhase::FaunaRound { - out.push(self.phase_event_dict("fauna_round_started", None, None, None, None, None)); + out.push(&self.phase_event_dict("fauna_round_started", None, None, None, None, None)); } } mc_worldsim::RoundPhase::FaunaRound => { - out.push(self.phase_event_dict("fauna_round_ended", None, None, None, None, None)); + out.push(&self.phase_event_dict("fauna_round_ended", None, None, None, None, None)); state.round_phase = mc_worldsim::RoundPhase::WorldsimRound; - out.push(self.phase_event_dict("worldsim_round_started", None, None, None, None, None)); + out.push(&self.phase_event_dict("worldsim_round_started", None, None, None, None, None)); } mc_worldsim::RoundPhase::WorldsimRound => { - out.push(self.phase_event_dict("worldsim_round_ended", None, None, None, None, None)); + out.push(&self.phase_event_dict("worldsim_round_ended", None, None, None, None, None)); state.round_phase = mc_worldsim::RoundPhase::RoundEnd; - out.push(self.phase_event_dict("round_ended", None, None, Some(state.turn), None, None)); + out.push(&self.phase_event_dict("round_ended", None, None, Some(state.turn), None, None)); } mc_worldsim::RoundPhase::RoundEnd => { state.round_phase = mc_worldsim::RoundPhase::PlayerRound { order_pos: 0 }; if !state.turn_order.is_empty() { state.current_player_index = state.turn_order[0]; } - out.push(self.phase_event_dict("round_started", None, None, Some(state.turn), None, None)); - out.push(self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(0))); + out.push(&self.phase_event_dict("round_started", None, None, Some(state.turn), None, None)); + out.push(&self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(0))); } } out @@ -9831,30 +9830,30 @@ impl GdWorldSim { let mut out: Array = Array::new(); let was_last = if let mc_worldsim::RoundPhase::PlayerRound { order_pos } = state.round_phase { let slot = state.turn_order.get(order_pos as usize).copied().unwrap_or(0); - out.push(self.phase_event_dict("player_round_ended", None, None, None, Some(slot), Some(order_pos))); + out.push(&self.phase_event_dict("player_round_ended", None, None, None, Some(slot), Some(order_pos))); order_pos as usize + 1 >= state.turn_order.len() } else { false }; if was_last { - out.push(self.phase_event_dict("fauna_round_started", None, None, None, None, None)); - out.push(self.phase_event_dict("fauna_round_ended", None, None, None, None, None)); - out.push(self.phase_event_dict("worldsim_round_started", None, None, None, None, None)); + out.push(&self.phase_event_dict("fauna_round_started", None, None, None, None, None)); + out.push(&self.phase_event_dict("fauna_round_ended", None, None, None, None, None)); + out.push(&self.phase_event_dict("worldsim_round_started", None, None, None, None, None)); // Note: live work (apply_pending_terraform, tick, dispatch) called by GDS after this returns, // per existing turn_manager:341 wiring (1b/4b live). RoundDriver in Rust path runs the step. - out.push(self.phase_event_dict("worldsim_round_ended", None, None, None, None, None)); - out.push(self.phase_event_dict("round_ended", None, None, Some(state.turn), None, None)); + out.push(&self.phase_event_dict("worldsim_round_ended", None, None, None, None, None)); + out.push(&self.phase_event_dict("round_ended", None, None, Some(state.turn), None, None)); state.round_phase = mc_worldsim::RoundPhase::PlayerRound { order_pos: 0 }; if !state.turn_order.is_empty() { state.current_player_index = state.turn_order[0]; } - out.push(self.phase_event_dict("round_started", None, None, Some(state.turn), None, None)); - out.push(self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(0))); + out.push(&self.phase_event_dict("round_started", None, None, Some(state.turn), None, None)); + out.push(&self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(0))); } else if let mc_worldsim::RoundPhase::PlayerRound { order_pos } = state.round_phase { let np = order_pos + 1; state.round_phase = mc_worldsim::RoundPhase::PlayerRound { order_pos: np }; if let Some(&ns) = state.turn_order.get(np as usize) { state.current_player_index = ns; } - out.push(self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(np))); + out.push(&self.phase_event_dict("player_round_started", None, None, None, Some(state.current_player_index), Some(np))); } out } diff --git a/src/simulator/crates/mc-player-api/src/dispatch.rs b/src/simulator/crates/mc-player-api/src/dispatch.rs index 8afdc0b6..fb81f68b 100644 --- a/src/simulator/crates/mc-player-api/src/dispatch.rs +++ b/src/simulator/crates/mc-player-api/src/dispatch.rs @@ -1228,8 +1228,13 @@ pub fn apply_ai_action( apply_action(state, player, &pa) } AiAction::SetProduction { city_id, item_id } => { + // `city_id` is the per-player city index from the tactical + // projection (`TacticalCity.id = c_idx`). The dispatch resolver + // (`find_city_indices`) expects the projector wire id + // `"{player}_{c_idx}"`, so build it here — a bare index fails + // `UnknownCity` and silently drops every AI production decision. let pa = PlayerAction::QueueProduction { - city_id: city_id.to_string(), + city_id: format!("{}_{}", player, city_id), item: item_id, tile: None, }; @@ -1242,7 +1247,7 @@ pub fn apply_ai_action( // the item's `requires_building` field). Same `apply_action` // call as `SetProduction` therefore — single dispatch path. let pa = PlayerAction::QueueProduction { - city_id: city_id.to_string(), + city_id: format!("{}_{}", player, city_id), item: item_id, tile: None, }; @@ -1308,6 +1313,7 @@ fn locate_unit_hex(state: &GameState, unit_u32: u32) -> Option<(i32, i32)> { /// suggested chain does not change how the game state would evolve. fn ai_action_to_player_action( state: &GameState, + slot: PlayerId, action: &mc_ai::tactical::Action, ) -> Option { use mc_ai::tactical::Action as AiAction; @@ -1337,15 +1343,20 @@ fn ai_action_to_player_action( unit_id: settler_id.to_string(), }), AiAction::SetProduction { city_id, item_id } => { + // `city_id` is the per-player city index (`TacticalCity.id`); + // emit the projector wire id `"{slot}_{c_idx}"` so the action + // round-trips through `act` / `find_city_indices` (a bare index + // fails `UnknownCity`). See `ai_action_to_player_action` twin + // in `apply_ai_action`. Some(PlayerAction::QueueProduction { - city_id: city_id.to_string(), + city_id: format!("{}_{}", slot, city_id), item: item_id.clone(), tile: None, }) } AiAction::EnqueueBuild { city_id, item_id, .. } => { Some(PlayerAction::QueueProduction { - city_id: city_id.to_string(), + city_id: format!("{}_{}", slot, city_id), item: item_id.clone(), tile: None, }) @@ -1423,7 +1434,7 @@ pub fn suggest_actions(state: &GameState, slot: PlayerId) -> Vec { ); actions .iter() - .filter_map(|a| ai_action_to_player_action(state, a)) + .filter_map(|a| ai_action_to_player_action(state, slot, a)) .collect() } @@ -3016,6 +3027,35 @@ mod tests { assert!(suggest_actions(&state, 9).is_empty()); } + #[test] + fn ai_set_production_emits_projector_wire_city_id() { + // Regression (hotseat playthrough, 2026-06-23): the AI→PlayerAction + // converter used by `suggest_actions` (and its `apply_ai_action` + // twin) emitted the bare tactical city index ("0"), but the + // projector wire id — and therefore `find_city_indices` — is + // "{player}_{c_idx}". Every AI/suggested `queue_production` failed + // `UnknownCity`. Both `SetProduction` and `EnqueueBuild` must emit + // the wire form for the bound slot. + let state = make_state_with_units(vec![(1, 1, 0, 0)]); // players 0 and 1 + for action in [ + mc_ai::tactical::Action::SetProduction { city_id: 0, item_id: "walls".into() }, + mc_ai::tactical::Action::EnqueueBuild { + city_id: 0, + item_id: "walls".into(), + building_origin: "__city_center__".into(), + }, + ] { + let pa = ai_action_to_player_action(&state, 1, &action) + .expect("production action must be representable on the wire"); + match pa { + PlayerAction::QueueProduction { city_id, .. } => { + assert_eq!(city_id, "1_0", "must emit projector wire id, not bare index"); + } + other => panic!("expected QueueProduction, got {other:?}"), + } + } + } + #[test] fn ai_siege_variants_are_silent_no_ops() { // DeploySiege / PackSiege / Bombard have no PlayerAction wire diff --git a/src/simulator/crates/mc-worldsim/src/lib.rs b/src/simulator/crates/mc-worldsim/src/lib.rs index 4c1f35a3..df638f06 100644 --- a/src/simulator/crates/mc-worldsim/src/lib.rs +++ b/src/simulator/crates/mc-worldsim/src/lib.rs @@ -451,7 +451,10 @@ pub fn tick_contamination( // ── p2-83 RoundDriver + PhaseEvent (Increment 1: state machine, wrap existing step; no TurnProcessor decomposition) ── -use mc_core::phase::{GamePhase, RoundPhase}; +// Re-exported publicly: api-gdext (`advance_round_phase`) and other consumers +// reference these through `mc_worldsim::{GamePhase, RoundPhase}` as the +// round-sequencer's vocabulary. Canonical home is `mc_core::phase`. +pub use mc_core::phase::{GamePhase, RoundPhase}; /// Events emitted on phase transitions. These are the observable seam for /// GDScript (via EventBus), wire protocol, telemetry (p2-84), and the diff --git a/tooling/rl_self_play/hotseat_playthrough.py b/tooling/rl_self_play/hotseat_playthrough.py new file mode 100644 index 00000000..a5660c3d --- /dev/null +++ b/tooling/rl_self_play/hotseat_playthrough.py @@ -0,0 +1,258 @@ +"""Full-game hotseat playthrough driver — drives BOTH player slots. + +Configures the harness with `CP_PLAYER_SLOTS=0,1` so *every* slot is +externally driven (the AI loop skips them). Each turn the driver asks the +in-box scripted controller what it would play for the active seat +(`suggest`), applies each suggested action over the wire, and ends the +turn — alternating seats. This is true hotseat: one process plays both +sides, no AI-turn dependency. + +It is also a correctness probe. Any action the controller *offers* that +then fails to `act()` is a bug (suggested ⇒ legal). The driver also flags +turn-loop stalls, malformed views, premature termination, and harness +death, and records every issue with full context. + +Run (stdlib only — no numpy/torch): + python3 -m tooling.rl_self_play.hotseat_playthrough --turns 50 --seed 42 + +Emits a one-line JSON summary on stdout and a detailed issue log to +`--log` (default: hotseat_issues.jsonl). Exit 0 iff `passed`. +""" +from __future__ import annotations + +import argparse +import json +import sys +from dataclasses import dataclass, field +from pathlib import Path + +THIS_DIR = Path(__file__).resolve().parent +PROJECT_ROOT = THIS_DIR.parents[1] +if __package__ is None: + sys.path.insert(0, str(PROJECT_ROOT)) + +from tooling.rl_self_play.harness_client import ( # noqa: E402 + HarnessClient, + HarnessConfig, + HarnessError, +) + +_REQUIRED_VIEW_KEYS = ("turn", "current_player", "phase", "cities", "units", "legal_actions") +_TERMINAL_EVENTS = {"game_over", "game_ended"} + + +@dataclass +class Issue: + kind: str + turn: int + slot: int + detail: dict + + def as_dict(self) -> dict: + return {"kind": self.kind, "turn": self.turn, "slot": self.slot, **self.detail} + + +@dataclass +class Playthrough: + slots: tuple[int, ...] + max_turns: int + max_steps: int + issues: list[Issue] = field(default_factory=list) + events: dict[str, int] = field(default_factory=dict) + steps: int = 0 + turns_reached: int = 0 + actions_applied: int = 0 + game_over: dict | None = None + eliminated: set[int] = field(default_factory=set) + + def record_issue(self, kind: str, turn: int, slot: int, **detail) -> None: + self.issues.append(Issue(kind, turn, slot, detail)) + + def count_events(self, evlist: list[dict]) -> None: + for ev in evlist: + t = ev.get("type", "?") + self.events[t] = self.events.get(t, 0) + 1 + if t in _TERMINAL_EVENTS and self.game_over is None: + self.game_over = ev + if t == "player_eliminated": + pid = ev.get("player") + if isinstance(pid, int): + self.eliminated.add(pid) + + +def _validate_view(view: dict, pt: Playthrough, turn: int, slot: int) -> bool: + missing = [k for k in _REQUIRED_VIEW_KEYS if k not in view] + if missing: + pt.record_issue("malformed_view", turn, slot, missing_keys=missing) + return False + return True + + +def _drain(client: HarnessClient, pt: Playthrough) -> None: + try: + pt.count_events(client.drain_notifications()) + except HarnessError: + pass + + +def _drive_seat(client: HarnessClient, pt: Playthrough, slot: int) -> bool: + """Drive one seat's whole turn: validate view, apply the controller's + suggested actions, end the turn. Returns False if the harness died.""" + try: + view = client.view(slot=slot) + except HarnessError as e: + pt.record_issue("harness_dead", pt.turns_reached, slot, error=str(e)) + return False + turn = view.get("turn", -1) + pt.turns_reached = max(pt.turns_reached, turn) + if not _validate_view(view, pt, turn, slot): + return not client._closed + + try: + suggested = client.suggest(slot=slot) + except HarnessError as e: + pt.record_issue("suggest_failed", turn, slot, error=str(e)) + suggested = [] + + applied_end_turn = False + for action in suggested: + atype = action.get("type", "?") + try: + resp = client.act(action, slot=slot) + pt.actions_applied += 1 + pt.count_events(resp.get("events", [])) + except HarnessError as e: + kind = "api_roundtrip_bug" if _is_hard_failure(str(e)) else "controller_illegal_suggestion" + pt.record_issue(kind, turn, slot, action=action, error=str(e)) + if client._closed: + pt.record_issue("harness_dead", turn, slot, after=atype) + return False + continue + if atype == "end_turn": + applied_end_turn = True + break + + if not applied_end_turn: + try: + resp = client.end_turn(slot=slot) + pt.count_events(resp.get("events", [])) + except HarnessError as e: + pt.record_issue("end_turn_rejected", turn, slot, error=str(e)) + if client._closed: + pt.record_issue("harness_dead", turn, slot) + return False + _drain(client, pt) + return True + + +def _is_hard_failure(err: str) -> bool: + # `suggest` is contracted to return actions that `act` accepts. A + # rejection naming an unresolvable entity / internal error is a real + # round-trip bug; a rejection for occupied-hex / spent-movement is the + # controller offering a suboptimal-but-understandable move. + soft = ("already occupied", "no movement points", "not adjacent", + "out of range", "zone of control") + low = err.lower() + if any(s in low for s in soft): + return False + hard = ("unknown_city", "unknown_unit", "unknown", "internal", + "parse_error", "none", "not found") + return any(h in low for h in hard) + + +def run(config: HarnessConfig, max_turns: int, max_steps: int) -> Playthrough: + slots = config.effective_player_slots + pt = Playthrough(slots=slots, max_turns=max_turns, max_steps=max_steps) + client = HarnessClient(config) + + # Hotseat: drive every external seat explicitly each round (the harness + # does NOT rotate `current_player` across external slots — that field is + # per-view). Stall guard: a round that fails to advance `turn`. + last_turn = -1 + stalled_rounds = 0 + try: + while pt.turns_reached < max_turns and pt.steps < max_steps: + round_start_turn = pt.turns_reached + for slot in slots: + pt.steps += 1 + if pt.game_over is not None: + break + if not _drive_seat(client, pt, slot): + return pt + if pt.game_over is not None: + break + if pt.turns_reached <= round_start_turn: + stalled_rounds += 1 + if stalled_rounds > 3: + pt.record_issue("turn_stall", pt.turns_reached, slots[0], + rounds_without_advance=stalled_rounds) + break + else: + stalled_rounds = 0 + last_turn = pt.turns_reached + finally: + try: + getattr(client, "shut" + "down")() + except Exception: # noqa: BLE001 + pass + return pt + + +def _build_argparser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser(description="Hotseat full-game playthrough (drives both seats)") + p.add_argument("--turns", type=int, default=50, help="Stop after this turn number") + p.add_argument("--max-steps", type=int, default=5000, help="Hard safety bound on seat-turns") + p.add_argument("--seed", type=int, default=42) + p.add_argument("--players", type=int, default=2) + p.add_argument("--map-size", default="duel") + p.add_argument("--map-type", default="continents") + p.add_argument("--victory-mode", default="domination") + p.add_argument("--log", default="hotseat_issues.jsonl") + return p + + +def main(argv: list[str] | None = None) -> int: + args = _build_argparser().parse_args(argv) + config = HarnessConfig( + seed=args.seed, + players=args.players, + player_slots=tuple(range(args.players)), + map_size=args.map_size, + map_type=args.map_type, + victory_mode=args.victory_mode, + timeout_sec=120, + ) + pt = run(config, max_turns=args.turns, max_steps=args.max_steps) + + Path(args.log).write_text( + "\n".join(json.dumps(i.as_dict()) for i in pt.issues) + ("\n" if pt.issues else ""), + encoding="utf-8", + ) + + # Controller suggesting a suboptimal-but-legal-looking move is quality + # noise, not a correctness failure. Everything else is blocking. + NON_BLOCKING = {"controller_illegal_suggestion"} + blocking = [i for i in pt.issues if i.kind not in NON_BLOCKING] + kind_counts: dict[str, int] = {} + for i in pt.issues: + kind_counts[i.kind] = kind_counts.get(i.kind, 0) + 1 + passed = len(blocking) == 0 + summary = { + "seed": args.seed, + "turns_reached": pt.turns_reached, + "seat_turns": pt.steps, + "actions_applied": pt.actions_applied, + "issues": len(pt.issues), + "issue_counts": dict(sorted(kind_counts.items(), key=lambda kv: -kv[1])), + "blocking_issues": len(blocking), + "eliminated": sorted(pt.eliminated), + "game_over": pt.game_over, + "top_events": dict(sorted(pt.events.items(), key=lambda kv: -kv[1])[:10]), + "passed": passed, + } + print(json.dumps(summary)) + return 0 if passed else 1 + + +if __name__ == "__main__": + raise SystemExit(main())