feat(mc-player-ai): Introduce LearnedBehavior module, LearnedController, and DispatchSystem for AI behavior routing and decision-making

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
autocommit 2026-06-04 09:26:32 -07:00
parent 10461b647f
commit ac362475eb
5 changed files with 34 additions and 6 deletions

View file

@ -13,7 +13,7 @@ use std::collections::HashMap;
use std::sync::{OnceLock, RwLock};
use mc_ai::evaluator::ScoringWeights;
use mc_ai::tactical::{Action, TacticalState};
use mc_ai::tactical::{Action, TacticalMemory, TacticalState};
/// Default controller id stamped on every AI slot at game-setup time.
/// `dispatch::drive_ai_slot` falls back to this id when a slot's
@ -59,12 +59,21 @@ pub trait AiController: Send + Sync {
/// Decide the action chain this slot wants to execute this turn.
/// Mirrors the existing `mc_ai::tactical::run_ai_turn` signature so
/// the scripted wrapper is a one-line delegate.
///
/// `memory` is the slot's persistent [`TacticalMemory`] (p1-29h) — the
/// cross-turn channel for army-level target-lock + commitment hysteresis.
/// Controllers that don't use it simply ignore the argument; the scripted
/// default threads it into `run_ai_turn`. Borrowed `&mut` from
/// `PlayerState::tactical_memory` by the dispatch layer so mutations
/// survive the per-turn `TacticalState` snapshot boundary with no GDScript
/// shadow state.
fn decide_turn(
&self,
state: &TacticalState,
slot: u8,
weights: &ScoringWeights,
seed: u64,
memory: &mut TacticalMemory,
) -> Vec<Action>;
/// Identity card for save / replay / lobby integrity checks.
@ -82,8 +91,9 @@ impl AiController for ScriptedController {
slot: u8,
weights: &ScoringWeights,
seed: u64,
memory: &mut TacticalMemory,
) -> Vec<Action> {
mc_ai::tactical::run_ai_turn(state, slot, weights, seed)
mc_ai::tactical::run_ai_turn(state, slot, weights, seed, memory)
}
fn ident(&self) -> AiControllerIdent {
@ -129,6 +139,7 @@ pub fn drive_controller_turn(
slot: u8,
weights: &ScoringWeights,
seed: u64,
memory: &mut TacticalMemory,
) -> Vec<Action> {
let guard = registry()
.read()
@ -141,7 +152,7 @@ pub fn drive_controller_turn(
guard
.get(lookup_id)
.expect("default controller must always be registered")
.decide_turn(state, slot, weights, seed)
.decide_turn(state, slot, weights, seed, memory)
}
/// List every registered controller id. Stage 8's game-setup UI uses
@ -168,6 +179,7 @@ mod tests {
_slot: u8,
_weights: &ScoringWeights,
_seed: u64,
_memory: &mut TacticalMemory,
) -> Vec<Action> {
Vec::new()
}

View file

@ -1007,13 +1007,21 @@ fn drive_ai_slot(state: &mut GameState, ai_slot: u8) -> u32 {
if crate::learned::is_learned_controller(&controller_id) {
return drive_learned_slot(state, ai_slot);
}
// p1-29h — borrow the slot's persistent tactical memory `&mut` so the
// army-level target-lock + commitment hysteresis survives this turn's
// `TacticalState` snapshot (which is rebuilt fresh every turn). `tactical`,
// `weights`, and `controller_id` are all owned/cloned by here, so no
// borrow of `state` is live and this disjoint field borrow is sound.
let mut memory = std::mem::take(&mut state.players[pi].tactical_memory);
let actions = crate::controllers::drive_controller_turn(
&controller_id,
&tactical,
ai_slot,
&weights,
seed,
&mut memory,
);
state.players[pi].tactical_memory = memory;
let mut applied: u32 = 0;
for action in actions {
match apply_ai_action(state, ai_slot, action) {
@ -1354,12 +1362,19 @@ pub fn suggest_actions(state: &GameState, slot: PlayerId) -> Vec<PlayerAction> {
let weights = state.players[pi].scoring_weights.clone();
let seed = seed_for_ai_turn(state.turn, slot);
let controller_id = state.players[pi].controller_id.clone();
// p1-29h — `suggest_actions` is read-only (`&GameState`), so probe with a
// CLONE of the slot's tactical memory: the suggestion reflects the current
// commitment/lock state without mutating it (a suggestion must not advance
// the hysteresis timer). Determinism holds because the clone starts from
// the same persisted state every call.
let mut memory = state.players[pi].tactical_memory.clone();
let actions = crate::controllers::drive_controller_turn(
&controller_id,
&tactical,
slot,
&weights,
seed,
&mut memory,
);
actions
.iter()

View file

@ -35,7 +35,7 @@ use crate::view::PlayerView;
use encoder::{decode_action_index, encode_legal_actions, encode_observation};
use inference::{masked_argmax, top_k, PolicyError, PolicyNet};
use mc_ai::evaluator::ScoringWeights;
use mc_ai::tactical::{Action, TacticalState};
use mc_ai::tactical::{Action, TacticalMemory, TacticalState};
/// Registry key for the in-box learned controller.
pub const LEARNED_CONTROLLER_ID: &str = "learned:duel-v4-encfix-s7";
@ -133,6 +133,7 @@ impl AiController for LearnedController {
_slot: u8,
_weights: &ScoringWeights,
_seed: u64,
_memory: &mut TacticalMemory,
) -> Vec<Action> {
// The policy operates on `PlayerView`, not `TacticalState`, and runs a
// re-observing loop that needs `&mut GameState`. That loop lives in

View file

@ -1461,7 +1461,7 @@ fn drive_strong_claude_game(
tactical.current_player = 0;
let seed = seed_for_claude_turn(turn);
let actions =
mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed);
mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed, &mut mc_ai::tactical::TacticalMemory::default());
let mut took_real_action = false;
for action in actions {

View file

@ -90,7 +90,7 @@ fn probe_raw_ai_actions(state: &GameState, ai_slot: u8, turn: u32) -> usize {
let seed = (turn as u64)
.wrapping_mul(0x9E37_79B9_7F4A_7C15)
.wrapping_add(ai_slot as u64);
let actions = mc_ai::tactical::run_ai_turn(&tactical, ai_slot, &weights, seed);
let actions = mc_ai::tactical::run_ai_turn(&tactical, ai_slot, &weights, seed, &mut mc_ai::tactical::TacticalMemory::default());
let mut kinds: Vec<&'static str> = Vec::with_capacity(actions.len());
for a in &actions {
kinds.push(match a {