feat(@projects/@magic-civilization): ✨ add recording variant for learned slot actions

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-08 18:05:47 -07:00 · 2026-06-08 18:05:47 -07:00 · a5de8ad517
commit a5de8ad517
parent a804ceb430
2 changed files with 30 additions and 5 deletions
--- a/src/simulator/crates/mc-player-api/src/dispatch.rs
+++ b/src/simulator/crates/mc-player-api/src/dispatch.rs
@ -1054,15 +1054,37 @@ const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256;
 /// behalf — we stop the loop instead of applying it here to avoid recursing
 /// the AI driver).
 fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
+    // p1-29k — thin wrapper over the recording variant; the player-API world
+    // discards the applied-action log (behaviour-identical to the pre-split
+    // loop). The autoplay surface uses `drive_learned_slot_recording` directly
+    // so its GDScript reconciler can replay the policy's chosen actions.
+    drive_learned_slot_recording(state, ai_slot).0
+}
+
+/// p1-29k — the learned-slot policy loop, returning the ordered list of
+/// `PlayerAction`s it actually applied (the terminal `end_turn`/`noop` that
+/// stops the loop is NOT included — it is not an applied mutation). This is
+/// the single source of truth for the loop body; `drive_learned_slot` calls
+/// it and discards the log, so the player-API world is unchanged. The
+/// autoplay surface (`GdGameState::run_learned_slot`) consumes the log to
+/// reconcile the Rust post-turn state back into its GDScript entities.
+///
+/// `.0` is the count applied (== `log.len()`); `.1` is the log. Both are
+/// returned so callers that only want the count avoid a `.len()`.
+pub fn drive_learned_slot_recording(
+    state: &mut GameState,
+    ai_slot: u8,
+) -> (u32, Vec<crate::action::PlayerAction>) {
    let pi = ai_slot as usize;
    if pi >= state.players.len() {
-        return 0;
+        return (0, Vec::new());
    }
    let net = match crate::learned::shared_learned_policy() {
        Some(net) => net,
-        None => return 0, // artifact unavailable — slot passes its turn.
+        None => return (0, Vec::new()), // artifact unavailable — slot passes its turn.
    };
    let mut applied: u32 = 0;
+    let mut log: Vec<crate::action::PlayerAction> = Vec::new();
    for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN {
        // Fog-aware projection, matching `drive_ai_slot` and the training
        // harness default (`CP_OMNISCIENT=0`).
@ -1083,14 +1105,17 @@ fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
            crate::action::PlayerAction::EndTurn
            | crate::action::PlayerAction::Noop => break,
            action => match apply_action(state, ai_slot, &action) {
-                Ok(_) => applied += 1,
+                Ok(_) => {
+                    applied += 1;
+                    log.push(action);
+                }
                // A rejected action with no state change would loop forever —
                // stop the turn rather than spin.
                Err(_) => break,
            },
        }
    }
-    applied
+    (applied, log)
 }

 /// Derive a deterministic per-turn rng seed for `ai_slot`.
--- a/src/simulator/crates/mc-player-api/src/lib.rs
+++ b/src/simulator/crates/mc-player-api/src/lib.rs
@ -30,7 +30,7 @@ pub use controllers::{
    register_controller, registered_ids, AiController, AiControllerIdent, SandboxKind,
    ScriptedController, DEFAULT_CONTROLLER_ID,
 };
-pub use dispatch::{apply_action, apply_ai_action, suggest_actions};
+pub use dispatch::{apply_action, apply_ai_action, drive_learned_slot_recording, suggest_actions};
 pub use learned::{
    decide_action, is_learned_controller, register_learned_controllers, Decision,
    LearnedController, LEARNED_CONTROLLER_ID,