feat(mc-player-ai): ✨ Introduce LearnedBehavior module, LearnedController, and DispatchSystem for AI behavior routing and decision-making

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-04 09:26:32 -07:00 · 2026-06-04 09:26:32 -07:00 · ac362475eb
commit ac362475eb
parent 10461b647f
5 changed files with 34 additions and 6 deletions
--- a/src/simulator/crates/mc-player-api/src/controllers.rs
+++ b/src/simulator/crates/mc-player-api/src/controllers.rs
@ -13,7 +13,7 @@ use std::collections::HashMap;
 use std::sync::{OnceLock, RwLock};

 use mc_ai::evaluator::ScoringWeights;
-use mc_ai::tactical::{Action, TacticalState};
+use mc_ai::tactical::{Action, TacticalMemory, TacticalState};

 /// Default controller id stamped on every AI slot at game-setup time.
 /// `dispatch::drive_ai_slot` falls back to this id when a slot's
@ -59,12 +59,21 @@ pub trait AiController: Send + Sync {
    /// Decide the action chain this slot wants to execute this turn.
    /// Mirrors the existing `mc_ai::tactical::run_ai_turn` signature so
    /// the scripted wrapper is a one-line delegate.
+    ///
+    /// `memory` is the slot's persistent [`TacticalMemory`] (p1-29h) — the
+    /// cross-turn channel for army-level target-lock + commitment hysteresis.
+    /// Controllers that don't use it simply ignore the argument; the scripted
+    /// default threads it into `run_ai_turn`. Borrowed `&mut` from
+    /// `PlayerState::tactical_memory` by the dispatch layer so mutations
+    /// survive the per-turn `TacticalState` snapshot boundary with no GDScript
+    /// shadow state.
    fn decide_turn(
        &self,
        state: &TacticalState,
        slot: u8,
        weights: &ScoringWeights,
        seed: u64,
+        memory: &mut TacticalMemory,
    ) -> Vec<Action>;

    /// Identity card for save / replay / lobby integrity checks.
@ -82,8 +91,9 @@ impl AiController for ScriptedController {
        slot: u8,
        weights: &ScoringWeights,
        seed: u64,
+        memory: &mut TacticalMemory,
    ) -> Vec<Action> {
-        mc_ai::tactical::run_ai_turn(state, slot, weights, seed)
+        mc_ai::tactical::run_ai_turn(state, slot, weights, seed, memory)
    }

    fn ident(&self) -> AiControllerIdent {
@ -129,6 +139,7 @@ pub fn drive_controller_turn(
    slot: u8,
    weights: &ScoringWeights,
    seed: u64,
+    memory: &mut TacticalMemory,
 ) -> Vec<Action> {
    let guard = registry()
        .read()
@ -141,7 +152,7 @@ pub fn drive_controller_turn(
    guard
        .get(lookup_id)
        .expect("default controller must always be registered")
-        .decide_turn(state, slot, weights, seed)
+        .decide_turn(state, slot, weights, seed, memory)
 }

 /// List every registered controller id. Stage 8's game-setup UI uses
@ -168,6 +179,7 @@ mod tests {
            _slot: u8,
            _weights: &ScoringWeights,
            _seed: u64,
+            _memory: &mut TacticalMemory,
        ) -> Vec<Action> {
            Vec::new()
        }
--- a/src/simulator/crates/mc-player-api/src/dispatch.rs
+++ b/src/simulator/crates/mc-player-api/src/dispatch.rs
@ -1007,13 +1007,21 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
    if crate::learned::is_learned_controller(&controller_id) {
        return drive_learned_slot(state, ai_slot);
    }
+    // p1-29h — borrow the slot's persistent tactical memory `&mut` so the
+    // army-level target-lock + commitment hysteresis survives this turn's
+    // `TacticalState` snapshot (which is rebuilt fresh every turn). `tactical`,
+    // `weights`, and `controller_id` are all owned/cloned by here, so no
+    // borrow of `state` is live and this disjoint field borrow is sound.
+    let mut memory = std::mem::take(&mut state.players[pi].tactical_memory);
    let actions = crate::controllers::drive_controller_turn(
        &controller_id,
        &tactical,
        ai_slot,
        &weights,
        seed,
+        &mut memory,
    );
+    state.players[pi].tactical_memory = memory;
    let mut applied: u32 = 0;
    for action in actions {
        match apply_ai_action(state, ai_slot, action) {
@ -1354,12 +1362,19 @@ pub fn suggest_actions(state: &GameState, slot: PlayerId) -> Vec<PlayerAction> {
    let weights = state.players[pi].scoring_weights.clone();
    let seed = seed_for_ai_turn(state.turn, slot);
    let controller_id = state.players[pi].controller_id.clone();
+    // p1-29h — `suggest_actions` is read-only (`&GameState`), so probe with a
+    // CLONE of the slot's tactical memory: the suggestion reflects the current
+    // commitment/lock state without mutating it (a suggestion must not advance
+    // the hysteresis timer). Determinism holds because the clone starts from
+    // the same persisted state every call.
+    let mut memory = state.players[pi].tactical_memory.clone();
    let actions = crate::controllers::drive_controller_turn(
        &controller_id,
        &tactical,
        slot,
        &weights,
        seed,
+        &mut memory,
    );
    actions
        .iter()
--- a/src/simulator/crates/mc-player-api/src/learned/mod.rs
+++ b/src/simulator/crates/mc-player-api/src/learned/mod.rs
@ -35,7 +35,7 @@ use crate::view::PlayerView;
 use encoder::{decode_action_index, encode_legal_actions, encode_observation};
 use inference::{masked_argmax, top_k, PolicyError, PolicyNet};
 use mc_ai::evaluator::ScoringWeights;
-use mc_ai::tactical::{Action, TacticalState};
+use mc_ai::tactical::{Action, TacticalMemory, TacticalState};

 /// Registry key for the in-box learned controller.
 pub const LEARNED_CONTROLLER_ID: &str = "learned:duel-v4-encfix-s7";
@ -133,6 +133,7 @@ impl AiController for LearnedController {
        _slot: u8,
        _weights: &ScoringWeights,
        _seed: u64,
+        _memory: &mut TacticalMemory,
    ) -> Vec<Action> {
        // The policy operates on `PlayerView`, not `TacticalState`, and runs a
        // re-observing loop that needs `&mut GameState`. That loop lives in
--- a/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs
+++ b/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs
@ -1461,7 +1461,7 @@ fn drive_strong_claude_game(
        tactical.current_player = 0;
        let seed = seed_for_claude_turn(turn);
        let actions =
-            mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed);
+            mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed, &mut mc_ai::tactical::TacticalMemory::default());

        let mut took_real_action = false;
        for action in actions {
--- a/src/simulator/crates/mc-player-api/tests/smoke_5_endturn_mock.rs
+++ b/src/simulator/crates/mc-player-api/tests/smoke_5_endturn_mock.rs
@ -90,7 +90,7 @@ fn probe_raw_ai_actions(state: &GameState, ai_slot: u8, turn: u32) -> usize {
    let seed = (turn as u64)
        .wrapping_mul(0x9E37_79B9_7F4A_7C15)
        .wrapping_add(ai_slot as u64);
-    let actions = mc_ai::tactical::run_ai_turn(&tactical, ai_slot, &weights, seed);
+    let actions = mc_ai::tactical::run_ai_turn(&tactical, ai_slot, &weights, seed, &mut mc_ai::tactical::TacticalMemory::default());
    let mut kinds: Vec<&'static str> = Vec::with_capacity(actions.len());
    for a in &actions {
        kinds.push(match a {