diff --git a/src/simulator/crates/mc-player-api/src/dispatch.rs b/src/simulator/crates/mc-player-api/src/dispatch.rs index f7a000e6..e97590dd 100644 --- a/src/simulator/crates/mc-player-api/src/dispatch.rs +++ b/src/simulator/crates/mc-player-api/src/dispatch.rs @@ -981,6 +981,12 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 { // `drive_controller_turn` so legacy fixtures without // `controller_id` set keep working unchanged. let controller_id = state.players[pi].controller_id.clone(); + // p1-29f — `learned:*` slots run the trained ONNX policy via a re-observing + // view-loop (mirroring the training harness `view`/`act` loop), NOT the + // one-shot tactical `decide_turn`. Route them to `drive_learned_slot`. + if crate::learned::is_learned_controller(&controller_id) { + return drive_learned_slot(state, ai_slot); + } let actions = crate::controllers::drive_controller_turn( &controller_id, &tactical, @@ -1003,6 +1009,62 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 { applied } +/// Hard cap on actions applied for one learned slot's turn. The policy was +/// trained with a per-episode step budget; a runaway loop (e.g. the policy +/// repeatedly picking a no-op-equivalent legal action that never advances the +/// turn) must terminate. 256 covers any legitimate turn for a duel-scale game +/// (max 16 units * a few orders + per-city builds) with wide margin. +const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256; + +/// Drive one turn for a `learned:*` slot by running the trained ONNX policy +/// through the same `project_view -> decide -> apply_action -> re-project` +/// loop the policy trained against (p1-29f). Applies via [`apply_action`] — +/// the path training used for the policy's OWN actions (the harness +/// `apply_action_json`) — NOT `apply_ai_action`, so re-projected observations +/// match training. Returns the number of `PlayerAction`s applied (excluding +/// the terminal `end_turn`, which `apply_end_turn` issues on the dispatcher's +/// behalf — we stop the loop instead of applying it here to avoid recursing +/// the AI driver). +fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 { + let pi = ai_slot as usize; + if pi >= state.players.len() { + return 0; + } + let net = match crate::learned::shared_learned_policy() { + Some(net) => net, + None => return 0, // artifact unavailable — slot passes its turn. + }; + let mut applied: u32 = 0; + for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN { + // Fog-aware projection, matching `drive_ai_slot` and the training + // harness default (`CP_OMNISCIENT=0`). + let vision_state = + mc_vision::compute_vision(state, &mc_vision::VisionCatalog::default(), None); + let pv = match vision_state.for_player(ai_slot) { + Some(pv) => pv, + None => break, // no vision for this slot — nothing to decide. + }; + let view = crate::projection::project_view_with_vision(state, ai_slot, false, pv); + let decision = match crate::learned::decide_action(net, &view) { + Ok(d) => d, + Err(_) => break, + }; + match decision.action { + // The policy chose to end its turn (or has no legal action). Stop; + // `apply_end_turn` advances the slot rotation for us. + crate::action::PlayerAction::EndTurn + | crate::action::PlayerAction::Noop => break, + action => match apply_action(state, ai_slot, &action) { + Ok(_) => applied += 1, + // A rejected action with no state change would loop forever — + // stop the turn rather than spin. + Err(_) => break, + }, + } + } + applied +} + /// Derive a deterministic per-turn rng seed for `ai_slot`. /// /// Pure function of `(turn, slot)` — no read of mutable per-turn state. diff --git a/src/simulator/crates/mc-player-api/src/lib.rs b/src/simulator/crates/mc-player-api/src/lib.rs index ab4e6115..2d43e7cb 100644 --- a/src/simulator/crates/mc-player-api/src/lib.rs +++ b/src/simulator/crates/mc-player-api/src/lib.rs @@ -21,6 +21,7 @@ pub mod comms_dispatch; pub mod controllers; pub mod dispatch; pub mod error; +pub mod learned; pub mod projection; pub mod view; pub mod wire; @@ -30,6 +31,10 @@ pub use controllers::{ ScriptedController, DEFAULT_CONTROLLER_ID, }; pub use dispatch::{apply_action, apply_ai_action, suggest_actions}; +pub use learned::{ + decide_action, is_learned_controller, register_learned_controllers, Decision, + LearnedController, LEARNED_CONTROLLER_ID, +}; pub use projection::{ project_tactical, project_tactical_with_vision, project_view, project_view_with_vision, };