feat(@projects/@magic-civilization): ✨ add recording variant for learned slot actions
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
a804ceb430
commit
a5de8ad517
2 changed files with 30 additions and 5 deletions
|
|
@ -1054,15 +1054,37 @@ const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256;
|
|||
/// behalf — we stop the loop instead of applying it here to avoid recursing
|
||||
/// the AI driver).
|
||||
fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
|
||||
// p1-29k — thin wrapper over the recording variant; the player-API world
|
||||
// discards the applied-action log (behaviour-identical to the pre-split
|
||||
// loop). The autoplay surface uses `drive_learned_slot_recording` directly
|
||||
// so its GDScript reconciler can replay the policy's chosen actions.
|
||||
drive_learned_slot_recording(state, ai_slot).0
|
||||
}
|
||||
|
||||
/// p1-29k — the learned-slot policy loop, returning the ordered list of
|
||||
/// `PlayerAction`s it actually applied (the terminal `end_turn`/`noop` that
|
||||
/// stops the loop is NOT included — it is not an applied mutation). This is
|
||||
/// the single source of truth for the loop body; `drive_learned_slot` calls
|
||||
/// it and discards the log, so the player-API world is unchanged. The
|
||||
/// autoplay surface (`GdGameState::run_learned_slot`) consumes the log to
|
||||
/// reconcile the Rust post-turn state back into its GDScript entities.
|
||||
///
|
||||
/// `.0` is the count applied (== `log.len()`); `.1` is the log. Both are
|
||||
/// returned so callers that only want the count avoid a `.len()`.
|
||||
pub fn drive_learned_slot_recording(
|
||||
state: &mut GameState,
|
||||
ai_slot: u8,
|
||||
) -> (u32, Vec<crate::action::PlayerAction>) {
|
||||
let pi = ai_slot as usize;
|
||||
if pi >= state.players.len() {
|
||||
return 0;
|
||||
return (0, Vec::new());
|
||||
}
|
||||
let net = match crate::learned::shared_learned_policy() {
|
||||
Some(net) => net,
|
||||
None => return 0, // artifact unavailable — slot passes its turn.
|
||||
None => return (0, Vec::new()), // artifact unavailable — slot passes its turn.
|
||||
};
|
||||
let mut applied: u32 = 0;
|
||||
let mut log: Vec<crate::action::PlayerAction> = Vec::new();
|
||||
for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN {
|
||||
// Fog-aware projection, matching `drive_ai_slot` and the training
|
||||
// harness default (`CP_OMNISCIENT=0`).
|
||||
|
|
@ -1083,14 +1105,17 @@ fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
|
|||
crate::action::PlayerAction::EndTurn
|
||||
| crate::action::PlayerAction::Noop => break,
|
||||
action => match apply_action(state, ai_slot, &action) {
|
||||
Ok(_) => applied += 1,
|
||||
Ok(_) => {
|
||||
applied += 1;
|
||||
log.push(action);
|
||||
}
|
||||
// A rejected action with no state change would loop forever —
|
||||
// stop the turn rather than spin.
|
||||
Err(_) => break,
|
||||
},
|
||||
}
|
||||
}
|
||||
applied
|
||||
(applied, log)
|
||||
}
|
||||
|
||||
/// Derive a deterministic per-turn rng seed for `ai_slot`.
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ pub use controllers::{
|
|||
register_controller, registered_ids, AiController, AiControllerIdent, SandboxKind,
|
||||
ScriptedController, DEFAULT_CONTROLLER_ID,
|
||||
};
|
||||
pub use dispatch::{apply_action, apply_ai_action, suggest_actions};
|
||||
pub use dispatch::{apply_action, apply_ai_action, drive_learned_slot_recording, suggest_actions};
|
||||
pub use learned::{
|
||||
decide_action, is_learned_controller, register_learned_controllers, Decision,
|
||||
LearnedController, LEARNED_CONTROLLER_ID,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue