diff --git a/src/simulator/crates/mc-player-api/src/dispatch.rs b/src/simulator/crates/mc-player-api/src/dispatch.rs index 99b72de6..d015891d 100644 --- a/src/simulator/crates/mc-player-api/src/dispatch.rs +++ b/src/simulator/crates/mc-player-api/src/dispatch.rs @@ -1054,15 +1054,37 @@ const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256; /// behalf — we stop the loop instead of applying it here to avoid recursing /// the AI driver). fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 { + // p1-29k — thin wrapper over the recording variant; the player-API world + // discards the applied-action log (behaviour-identical to the pre-split + // loop). The autoplay surface uses `drive_learned_slot_recording` directly + // so its GDScript reconciler can replay the policy's chosen actions. + drive_learned_slot_recording(state, ai_slot).0 +} + +/// p1-29k — the learned-slot policy loop, returning the ordered list of +/// `PlayerAction`s it actually applied (the terminal `end_turn`/`noop` that +/// stops the loop is NOT included — it is not an applied mutation). This is +/// the single source of truth for the loop body; `drive_learned_slot` calls +/// it and discards the log, so the player-API world is unchanged. The +/// autoplay surface (`GdGameState::run_learned_slot`) consumes the log to +/// reconcile the Rust post-turn state back into its GDScript entities. +/// +/// `.0` is the count applied (== `log.len()`); `.1` is the log. Both are +/// returned so callers that only want the count avoid a `.len()`. +pub fn drive_learned_slot_recording( + state: &mut GameState, + ai_slot: u8, +) -> (u32, Vec) { let pi = ai_slot as usize; if pi >= state.players.len() { - return 0; + return (0, Vec::new()); } let net = match crate::learned::shared_learned_policy() { Some(net) => net, - None => return 0, // artifact unavailable — slot passes its turn. + None => return (0, Vec::new()), // artifact unavailable — slot passes its turn. }; let mut applied: u32 = 0; + let mut log: Vec = Vec::new(); for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN { // Fog-aware projection, matching `drive_ai_slot` and the training // harness default (`CP_OMNISCIENT=0`). @@ -1083,14 +1105,17 @@ fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 { crate::action::PlayerAction::EndTurn | crate::action::PlayerAction::Noop => break, action => match apply_action(state, ai_slot, &action) { - Ok(_) => applied += 1, + Ok(_) => { + applied += 1; + log.push(action); + } // A rejected action with no state change would loop forever — // stop the turn rather than spin. Err(_) => break, }, } } - applied + (applied, log) } /// Derive a deterministic per-turn rng seed for `ai_slot`. diff --git a/src/simulator/crates/mc-player-api/src/lib.rs b/src/simulator/crates/mc-player-api/src/lib.rs index 2d43e7cb..43557d12 100644 --- a/src/simulator/crates/mc-player-api/src/lib.rs +++ b/src/simulator/crates/mc-player-api/src/lib.rs @@ -30,7 +30,7 @@ pub use controllers::{ register_controller, registered_ids, AiController, AiControllerIdent, SandboxKind, ScriptedController, DEFAULT_CONTROLLER_ID, }; -pub use dispatch::{apply_action, apply_ai_action, suggest_actions}; +pub use dispatch::{apply_action, apply_ai_action, drive_learned_slot_recording, suggest_actions}; pub use learned::{ decide_action, is_learned_controller, register_learned_controllers, Decision, LearnedController, LEARNED_CONTROLLER_ID,