feat(mc-player-api): ✨ Add routing logic for learned controller slots in dispatch system
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
20e7788da5
commit
11c9b71a02
2 changed files with 67 additions and 0 deletions
|
|
@ -981,6 +981,12 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
|
|||
// `drive_controller_turn` so legacy fixtures without
|
||||
// `controller_id` set keep working unchanged.
|
||||
let controller_id = state.players[pi].controller_id.clone();
|
||||
// p1-29f — `learned:*` slots run the trained ONNX policy via a re-observing
|
||||
// view-loop (mirroring the training harness `view`/`act` loop), NOT the
|
||||
// one-shot tactical `decide_turn`. Route them to `drive_learned_slot`.
|
||||
if crate::learned::is_learned_controller(&controller_id) {
|
||||
return drive_learned_slot(state, ai_slot);
|
||||
}
|
||||
let actions = crate::controllers::drive_controller_turn(
|
||||
&controller_id,
|
||||
&tactical,
|
||||
|
|
@ -1003,6 +1009,62 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
|
|||
applied
|
||||
}
|
||||
|
||||
/// Hard cap on actions applied for one learned slot's turn. The policy was
|
||||
/// trained with a per-episode step budget; a runaway loop (e.g. the policy
|
||||
/// repeatedly picking a no-op-equivalent legal action that never advances the
|
||||
/// turn) must terminate. 256 covers any legitimate turn for a duel-scale game
|
||||
/// (max 16 units * a few orders + per-city builds) with wide margin.
|
||||
const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256;
|
||||
|
||||
/// Drive one turn for a `learned:*` slot by running the trained ONNX policy
|
||||
/// through the same `project_view -> decide -> apply_action -> re-project`
|
||||
/// loop the policy trained against (p1-29f). Applies via [`apply_action`] —
|
||||
/// the path training used for the policy's OWN actions (the harness
|
||||
/// `apply_action_json`) — NOT `apply_ai_action`, so re-projected observations
|
||||
/// match training. Returns the number of `PlayerAction`s applied (excluding
|
||||
/// the terminal `end_turn`, which `apply_end_turn` issues on the dispatcher's
|
||||
/// behalf — we stop the loop instead of applying it here to avoid recursing
|
||||
/// the AI driver).
|
||||
fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
|
||||
let pi = ai_slot as usize;
|
||||
if pi >= state.players.len() {
|
||||
return 0;
|
||||
}
|
||||
let net = match crate::learned::shared_learned_policy() {
|
||||
Some(net) => net,
|
||||
None => return 0, // artifact unavailable — slot passes its turn.
|
||||
};
|
||||
let mut applied: u32 = 0;
|
||||
for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN {
|
||||
// Fog-aware projection, matching `drive_ai_slot` and the training
|
||||
// harness default (`CP_OMNISCIENT=0`).
|
||||
let vision_state =
|
||||
mc_vision::compute_vision(state, &mc_vision::VisionCatalog::default(), None);
|
||||
let pv = match vision_state.for_player(ai_slot) {
|
||||
Some(pv) => pv,
|
||||
None => break, // no vision for this slot — nothing to decide.
|
||||
};
|
||||
let view = crate::projection::project_view_with_vision(state, ai_slot, false, pv);
|
||||
let decision = match crate::learned::decide_action(net, &view) {
|
||||
Ok(d) => d,
|
||||
Err(_) => break,
|
||||
};
|
||||
match decision.action {
|
||||
// The policy chose to end its turn (or has no legal action). Stop;
|
||||
// `apply_end_turn` advances the slot rotation for us.
|
||||
crate::action::PlayerAction::EndTurn
|
||||
| crate::action::PlayerAction::Noop => break,
|
||||
action => match apply_action(state, ai_slot, &action) {
|
||||
Ok(_) => applied += 1,
|
||||
// A rejected action with no state change would loop forever —
|
||||
// stop the turn rather than spin.
|
||||
Err(_) => break,
|
||||
},
|
||||
}
|
||||
}
|
||||
applied
|
||||
}
|
||||
|
||||
/// Derive a deterministic per-turn rng seed for `ai_slot`.
|
||||
///
|
||||
/// Pure function of `(turn, slot)` — no read of mutable per-turn state.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ pub mod comms_dispatch;
|
|||
pub mod controllers;
|
||||
pub mod dispatch;
|
||||
pub mod error;
|
||||
pub mod learned;
|
||||
pub mod projection;
|
||||
pub mod view;
|
||||
pub mod wire;
|
||||
|
|
@ -30,6 +31,10 @@ pub use controllers::{
|
|||
ScriptedController, DEFAULT_CONTROLLER_ID,
|
||||
};
|
||||
pub use dispatch::{apply_action, apply_ai_action, suggest_actions};
|
||||
pub use learned::{
|
||||
decide_action, is_learned_controller, register_learned_controllers, Decision,
|
||||
LearnedController, LEARNED_CONTROLLER_ID,
|
||||
};
|
||||
pub use projection::{
|
||||
project_tactical, project_tactical_with_vision, project_view, project_view_with_vision,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue