feat(mc-player-api): Add routing logic for learned controller slots in dispatch system

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
autocommit 2026-06-03 04:06:43 -07:00
parent 20e7788da5
commit 11c9b71a02
2 changed files with 67 additions and 0 deletions

View file

@ -981,6 +981,12 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
// `drive_controller_turn` so legacy fixtures without
// `controller_id` set keep working unchanged.
let controller_id = state.players[pi].controller_id.clone();
// p1-29f — `learned:*` slots run the trained ONNX policy via a re-observing
// view-loop (mirroring the training harness `view`/`act` loop), NOT the
// one-shot tactical `decide_turn`. Route them to `drive_learned_slot`.
if crate::learned::is_learned_controller(&controller_id) {
return drive_learned_slot(state, ai_slot);
}
let actions = crate::controllers::drive_controller_turn(
&controller_id,
&tactical,
@ -1003,6 +1009,62 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
applied
}
/// Hard cap on actions applied for one learned slot's turn. The policy was
/// trained with a per-episode step budget; a runaway loop (e.g. the policy
/// repeatedly picking a no-op-equivalent legal action that never advances the
/// turn) must terminate. 256 covers any legitimate turn for a duel-scale game
/// (max 16 units * a few orders + per-city builds) with wide margin.
const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256;
/// Drive one turn for a `learned:*` slot by running the trained ONNX policy
/// through the same `project_view -> decide -> apply_action -> re-project`
/// loop the policy trained against (p1-29f). Applies via [`apply_action`] —
/// the path training used for the policy's OWN actions (the harness
/// `apply_action_json`) — NOT `apply_ai_action`, so re-projected observations
/// match training. Returns the number of `PlayerAction`s applied (excluding
/// the terminal `end_turn`, which `apply_end_turn` issues on the dispatcher's
/// behalf — we stop the loop instead of applying it here to avoid recursing
/// the AI driver).
fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
let pi = ai_slot as usize;
if pi >= state.players.len() {
return 0;
}
let net = match crate::learned::shared_learned_policy() {
Some(net) => net,
None => return 0, // artifact unavailable — slot passes its turn.
};
let mut applied: u32 = 0;
for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN {
// Fog-aware projection, matching `drive_ai_slot` and the training
// harness default (`CP_OMNISCIENT=0`).
let vision_state =
mc_vision::compute_vision(state, &mc_vision::VisionCatalog::default(), None);
let pv = match vision_state.for_player(ai_slot) {
Some(pv) => pv,
None => break, // no vision for this slot — nothing to decide.
};
let view = crate::projection::project_view_with_vision(state, ai_slot, false, pv);
let decision = match crate::learned::decide_action(net, &view) {
Ok(d) => d,
Err(_) => break,
};
match decision.action {
// The policy chose to end its turn (or has no legal action). Stop;
// `apply_end_turn` advances the slot rotation for us.
crate::action::PlayerAction::EndTurn
| crate::action::PlayerAction::Noop => break,
action => match apply_action(state, ai_slot, &action) {
Ok(_) => applied += 1,
// A rejected action with no state change would loop forever —
// stop the turn rather than spin.
Err(_) => break,
},
}
}
applied
}
/// Derive a deterministic per-turn rng seed for `ai_slot`.
///
/// Pure function of `(turn, slot)` — no read of mutable per-turn state.

View file

@ -21,6 +21,7 @@ pub mod comms_dispatch;
pub mod controllers;
pub mod dispatch;
pub mod error;
pub mod learned;
pub mod projection;
pub mod view;
pub mod wire;
@ -30,6 +31,10 @@ pub use controllers::{
ScriptedController, DEFAULT_CONTROLLER_ID,
};
pub use dispatch::{apply_action, apply_ai_action, suggest_actions};
pub use learned::{
decide_action, is_learned_controller, register_learned_controllers, Decision,
LearnedController, LEARNED_CONTROLLER_ID,
};
pub use projection::{
project_tactical, project_tactical_with_vision, project_view, project_view_with_vision,
};