From ac362475eba17d7721414c28b4c5332f907933cd Mon Sep 17 00:00:00 2001 From: autocommit Date: Thu, 4 Jun 2026 09:26:32 -0700 Subject: [PATCH] =?UTF-8?q?feat(mc-player-ai):=20=E2=9C=A8=20Introduce=20L?= =?UTF-8?q?earnedBehavior=20module,=20LearnedController,=20and=20DispatchS?= =?UTF-8?q?ystem=20for=20AI=20behavior=20routing=20and=20decision-making?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- .../crates/mc-player-api/src/controllers.rs | 18 +++++++++++++++--- .../crates/mc-player-api/src/dispatch.rs | 15 +++++++++++++++ .../crates/mc-player-api/src/learned/mod.rs | 3 ++- .../tests/full_game_transcript.rs | 2 +- .../tests/smoke_5_endturn_mock.rs | 2 +- 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/simulator/crates/mc-player-api/src/controllers.rs b/src/simulator/crates/mc-player-api/src/controllers.rs index 5a06e174..65ff70cf 100644 --- a/src/simulator/crates/mc-player-api/src/controllers.rs +++ b/src/simulator/crates/mc-player-api/src/controllers.rs @@ -13,7 +13,7 @@ use std::collections::HashMap; use std::sync::{OnceLock, RwLock}; use mc_ai::evaluator::ScoringWeights; -use mc_ai::tactical::{Action, TacticalState}; +use mc_ai::tactical::{Action, TacticalMemory, TacticalState}; /// Default controller id stamped on every AI slot at game-setup time. /// `dispatch::drive_ai_slot` falls back to this id when a slot's @@ -59,12 +59,21 @@ pub trait AiController: Send + Sync { /// Decide the action chain this slot wants to execute this turn. /// Mirrors the existing `mc_ai::tactical::run_ai_turn` signature so /// the scripted wrapper is a one-line delegate. + /// + /// `memory` is the slot's persistent [`TacticalMemory`] (p1-29h) — the + /// cross-turn channel for army-level target-lock + commitment hysteresis. + /// Controllers that don't use it simply ignore the argument; the scripted + /// default threads it into `run_ai_turn`. Borrowed `&mut` from + /// `PlayerState::tactical_memory` by the dispatch layer so mutations + /// survive the per-turn `TacticalState` snapshot boundary with no GDScript + /// shadow state. fn decide_turn( &self, state: &TacticalState, slot: u8, weights: &ScoringWeights, seed: u64, + memory: &mut TacticalMemory, ) -> Vec; /// Identity card for save / replay / lobby integrity checks. @@ -82,8 +91,9 @@ impl AiController for ScriptedController { slot: u8, weights: &ScoringWeights, seed: u64, + memory: &mut TacticalMemory, ) -> Vec { - mc_ai::tactical::run_ai_turn(state, slot, weights, seed) + mc_ai::tactical::run_ai_turn(state, slot, weights, seed, memory) } fn ident(&self) -> AiControllerIdent { @@ -129,6 +139,7 @@ pub fn drive_controller_turn( slot: u8, weights: &ScoringWeights, seed: u64, + memory: &mut TacticalMemory, ) -> Vec { let guard = registry() .read() @@ -141,7 +152,7 @@ pub fn drive_controller_turn( guard .get(lookup_id) .expect("default controller must always be registered") - .decide_turn(state, slot, weights, seed) + .decide_turn(state, slot, weights, seed, memory) } /// List every registered controller id. Stage 8's game-setup UI uses @@ -168,6 +179,7 @@ mod tests { _slot: u8, _weights: &ScoringWeights, _seed: u64, + _memory: &mut TacticalMemory, ) -> Vec { Vec::new() } diff --git a/src/simulator/crates/mc-player-api/src/dispatch.rs b/src/simulator/crates/mc-player-api/src/dispatch.rs index db4ea551..8ac24282 100644 --- a/src/simulator/crates/mc-player-api/src/dispatch.rs +++ b/src/simulator/crates/mc-player-api/src/dispatch.rs @@ -1007,13 +1007,21 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 { if crate::learned::is_learned_controller(&controller_id) { return drive_learned_slot(state, ai_slot); } + // p1-29h — borrow the slot's persistent tactical memory `&mut` so the + // army-level target-lock + commitment hysteresis survives this turn's + // `TacticalState` snapshot (which is rebuilt fresh every turn). `tactical`, + // `weights`, and `controller_id` are all owned/cloned by here, so no + // borrow of `state` is live and this disjoint field borrow is sound. + let mut memory = std::mem::take(&mut state.players[pi].tactical_memory); let actions = crate::controllers::drive_controller_turn( &controller_id, &tactical, ai_slot, &weights, seed, + &mut memory, ); + state.players[pi].tactical_memory = memory; let mut applied: u32 = 0; for action in actions { match apply_ai_action(state, ai_slot, action) { @@ -1354,12 +1362,19 @@ pub fn suggest_actions(state: &GameState, slot: PlayerId) -> Vec { let weights = state.players[pi].scoring_weights.clone(); let seed = seed_for_ai_turn(state.turn, slot); let controller_id = state.players[pi].controller_id.clone(); + // p1-29h — `suggest_actions` is read-only (`&GameState`), so probe with a + // CLONE of the slot's tactical memory: the suggestion reflects the current + // commitment/lock state without mutating it (a suggestion must not advance + // the hysteresis timer). Determinism holds because the clone starts from + // the same persisted state every call. + let mut memory = state.players[pi].tactical_memory.clone(); let actions = crate::controllers::drive_controller_turn( &controller_id, &tactical, slot, &weights, seed, + &mut memory, ); actions .iter() diff --git a/src/simulator/crates/mc-player-api/src/learned/mod.rs b/src/simulator/crates/mc-player-api/src/learned/mod.rs index 79e5d8a3..f06b94c2 100644 --- a/src/simulator/crates/mc-player-api/src/learned/mod.rs +++ b/src/simulator/crates/mc-player-api/src/learned/mod.rs @@ -35,7 +35,7 @@ use crate::view::PlayerView; use encoder::{decode_action_index, encode_legal_actions, encode_observation}; use inference::{masked_argmax, top_k, PolicyError, PolicyNet}; use mc_ai::evaluator::ScoringWeights; -use mc_ai::tactical::{Action, TacticalState}; +use mc_ai::tactical::{Action, TacticalMemory, TacticalState}; /// Registry key for the in-box learned controller. pub const LEARNED_CONTROLLER_ID: &str = "learned:duel-v4-encfix-s7"; @@ -133,6 +133,7 @@ impl AiController for LearnedController { _slot: u8, _weights: &ScoringWeights, _seed: u64, + _memory: &mut TacticalMemory, ) -> Vec { // The policy operates on `PlayerView`, not `TacticalState`, and runs a // re-observing loop that needs `&mut GameState`. That loop lives in diff --git a/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs b/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs index dcb39806..cc10b044 100644 --- a/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs +++ b/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs @@ -1461,7 +1461,7 @@ fn drive_strong_claude_game( tactical.current_player = 0; let seed = seed_for_claude_turn(turn); let actions = - mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed); + mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed, &mut mc_ai::tactical::TacticalMemory::default()); let mut took_real_action = false; for action in actions { diff --git a/src/simulator/crates/mc-player-api/tests/smoke_5_endturn_mock.rs b/src/simulator/crates/mc-player-api/tests/smoke_5_endturn_mock.rs index 36f93626..9a1b95ed 100644 --- a/src/simulator/crates/mc-player-api/tests/smoke_5_endturn_mock.rs +++ b/src/simulator/crates/mc-player-api/tests/smoke_5_endturn_mock.rs @@ -90,7 +90,7 @@ fn probe_raw_ai_actions(state: &GameState, ai_slot: u8, turn: u32) -> usize { let seed = (turn as u64) .wrapping_mul(0x9E37_79B9_7F4A_7C15) .wrapping_add(ai_slot as u64); - let actions = mc_ai::tactical::run_ai_turn(&tactical, ai_slot, &weights, seed); + let actions = mc_ai::tactical::run_ai_turn(&tactical, ai_slot, &weights, seed, &mut mc_ai::tactical::TacticalMemory::default()); let mut kinds: Vec<&'static str> = Vec::with_capacity(actions.len()); for a in &actions { kinds.push(match a {