feat(mc-player-api): ✨ Add routing logic for learned controller slots in dispatch system

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-03 04:06:43 -07:00 · 2026-06-03 04:06:43 -07:00 · 11c9b71a02
commit 11c9b71a02
parent 20e7788da5
2 changed files with 67 additions and 0 deletions
--- a/src/simulator/crates/mc-player-api/src/dispatch.rs
+++ b/src/simulator/crates/mc-player-api/src/dispatch.rs
@ -981,6 +981,12 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
    // `drive_controller_turn` so legacy fixtures without
    // `controller_id` set keep working unchanged.
    let controller_id = state.players[pi].controller_id.clone();
+    // p1-29f — `learned:*` slots run the trained ONNX policy via a re-observing
+    // view-loop (mirroring the training harness `view`/`act` loop), NOT the
+    // one-shot tactical `decide_turn`. Route them to `drive_learned_slot`.
+    if crate::learned::is_learned_controller(&controller_id) {
+        return drive_learned_slot(state, ai_slot);
+    }
    let actions = crate::controllers::drive_controller_turn(
        &controller_id,
        &tactical,
@ -1003,6 +1009,62 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
    applied
 }

+/// Hard cap on actions applied for one learned slot's turn. The policy was
+/// trained with a per-episode step budget; a runaway loop (e.g. the policy
+/// repeatedly picking a no-op-equivalent legal action that never advances the
+/// turn) must terminate. 256 covers any legitimate turn for a duel-scale game
+/// (max 16 units * a few orders + per-city builds) with wide margin.
+const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256;
+
+/// Drive one turn for a `learned:*` slot by running the trained ONNX policy
+/// through the same `project_view -> decide -> apply_action -> re-project`
+/// loop the policy trained against (p1-29f). Applies via [`apply_action`] —
+/// the path training used for the policy's OWN actions (the harness
+/// `apply_action_json`) — NOT `apply_ai_action`, so re-projected observations
+/// match training. Returns the number of `PlayerAction`s applied (excluding
+/// the terminal `end_turn`, which `apply_end_turn` issues on the dispatcher's
+/// behalf — we stop the loop instead of applying it here to avoid recursing
+/// the AI driver).
+fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
+    let pi = ai_slot as usize;
+    if pi >= state.players.len() {
+        return 0;
+    }
+    let net = match crate::learned::shared_learned_policy() {
+        Some(net) => net,
+        None => return 0, // artifact unavailable — slot passes its turn.
+    };
+    let mut applied: u32 = 0;
+    for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN {
+        // Fog-aware projection, matching `drive_ai_slot` and the training
+        // harness default (`CP_OMNISCIENT=0`).
+        let vision_state =
+            mc_vision::compute_vision(state, &mc_vision::VisionCatalog::default(), None);
+        let pv = match vision_state.for_player(ai_slot) {
+            Some(pv) => pv,
+            None => break, // no vision for this slot — nothing to decide.
+        };
+        let view = crate::projection::project_view_with_vision(state, ai_slot, false, pv);
+        let decision = match crate::learned::decide_action(net, &view) {
+            Ok(d) => d,
+            Err(_) => break,
+        };
+        match decision.action {
+            // The policy chose to end its turn (or has no legal action). Stop;
+            // `apply_end_turn` advances the slot rotation for us.
+            crate::action::PlayerAction::EndTurn
+            | crate::action::PlayerAction::Noop => break,
+            action => match apply_action(state, ai_slot, &action) {
+                Ok(_) => applied += 1,
+                // A rejected action with no state change would loop forever —
+                // stop the turn rather than spin.
+                Err(_) => break,
+            },
+        }
+    }
+    applied
+}
+
 /// Derive a deterministic per-turn rng seed for `ai_slot`.
 ///
 /// Pure function of `(turn, slot)` — no read of mutable per-turn state.
--- a/src/simulator/crates/mc-player-api/src/lib.rs
+++ b/src/simulator/crates/mc-player-api/src/lib.rs
@ -21,6 +21,7 @@ pub mod comms_dispatch;
 pub mod controllers;
 pub mod dispatch;
 pub mod error;
+pub mod learned;
 pub mod projection;
 pub mod view;
 pub mod wire;
@ -30,6 +31,10 @@ pub use controllers::{
    ScriptedController, DEFAULT_CONTROLLER_ID,
 };
 pub use dispatch::{apply_action, apply_ai_action, suggest_actions};
+pub use learned::{
+    decide_action, is_learned_controller, register_learned_controllers, Decision,
+    LearnedController, LEARNED_CONTROLLER_ID,
+};
 pub use projection::{
    project_tactical, project_tactical_with_vision, project_view, project_view_with_vision,
 };