feat(mc-turn): ✨ Implement turn-based processing logic for structured game actions, policies, and formations in GameState, Processor, and BuildingActionHandlers
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
9c0a981bf3
commit
ae9de3186f
2 changed files with 699 additions and 0 deletions
309
src/simulator/crates/mc-turn/src/formation.rs
Normal file
309
src/simulator/crates/mc-turn/src/formation.rs
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Slot role for a unit within a formation. Per `HEX_GEOMETRY.md` §11
|
||||
/// formations occupy one centre slot plus a subset of the host hex's
|
||||
/// six edge slots. `slot_assignments` on `Formation` records each unit's
|
||||
/// role.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum FormationSlot {
|
||||
/// The host hex's centre slot. Holds the formation leader.
|
||||
Centre,
|
||||
/// One of the six edge slots, identified by the direction index `0..6`
|
||||
/// matching `mc-core::algorithms::hex::AXIAL_DIRECTIONS`.
|
||||
Edge { dir: u8 },
|
||||
}
|
||||
|
||||
impl FormationSlot {
|
||||
/// True if the unit occupies the centre slot.
|
||||
pub fn is_centre(self) -> bool {
|
||||
matches!(self, FormationSlot::Centre)
|
||||
}
|
||||
/// Returns the edge direction if this slot is an edge slot.
|
||||
pub fn edge_dir(self) -> Option<u8> {
|
||||
match self {
|
||||
FormationSlot::Edge { dir } => Some(dir),
|
||||
FormationSlot::Centre => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Ordered grouping of units that move and fight together.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Formation {
|
||||
pub id: u32,
|
||||
pub owner: u8,
|
||||
/// Stable unit IDs (MapUnit::id) that belong to this formation.
|
||||
pub unit_ids: Vec<u32>,
|
||||
/// The unit that leads this formation (front-most / highest HP).
|
||||
pub leader_id: u32,
|
||||
pub shape: FormationShape,
|
||||
pub command: FormationCommand,
|
||||
/// Hex the formation was told to rally to; None means no active rally.
|
||||
pub rally_origin: Option<(i32, i32)>,
|
||||
/// Per-unit slot role within the formation (centre or edge direction).
|
||||
/// `#[serde(default)]` so existing saves without slot data deserialize
|
||||
/// cleanly — empty map means "slots not yet assigned" and consumers
|
||||
/// fall back to the existing flat-list behaviour.
|
||||
#[serde(default)]
|
||||
pub slot_assignments: HashMap<u32, FormationSlot>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum FormationShape {
|
||||
Line { width: u8 },
|
||||
Column { depth: u8 },
|
||||
Wedge,
|
||||
Diamond,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum FormationCommand {
|
||||
Defend,
|
||||
Patrol { waypoints: Vec<(i32, i32)> },
|
||||
Advance { target_hex: (i32, i32) },
|
||||
}
|
||||
|
||||
/// Request to set or clear a building's rally point. Queued on GameState and
|
||||
/// drained each turn — mirrors the AttackRequest pattern.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RallyPointRequest {
|
||||
pub player_index: u8,
|
||||
pub city_index: usize,
|
||||
pub building_id: String,
|
||||
/// None = clear the rally point.
|
||||
pub hex: Option<(i32, i32)>,
|
||||
/// Standing order for freshly spawned units ("hold", "defend", "fortify",
|
||||
/// "join_formation", "patrol", "advance").
|
||||
pub command: String,
|
||||
/// Second waypoint for Patrol command. -1/-1 = not set (non-Patrol commands).
|
||||
#[serde(default = "default_minus_one")]
|
||||
pub waypoint_2_col: i32,
|
||||
#[serde(default = "default_minus_one")]
|
||||
pub waypoint_2_row: i32,
|
||||
}
|
||||
|
||||
fn default_minus_one() -> i32 {
|
||||
-1
|
||||
}
|
||||
|
||||
/// Request to issue a command to a formation. Queued on GameState.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FormationCommandRequest {
|
||||
pub player_index: u8,
|
||||
pub formation_id: u32,
|
||||
pub destination: (i32, i32),
|
||||
/// "Defend", "Advance", "Patrol"
|
||||
pub command: String,
|
||||
}
|
||||
|
||||
/// Request to change a formation's tactical shape. Queued on GameState.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FormationShapeRequest {
|
||||
pub player_index: u8,
|
||||
pub formation_id: u32,
|
||||
/// "line", "column", "wedge", "diamond"
|
||||
pub shape: String,
|
||||
}
|
||||
|
||||
/// Request to detach a single unit from its formation. Queued on GameState.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SplitFormationRequest {
|
||||
pub player_index: u8,
|
||||
pub unit_id: u32,
|
||||
}
|
||||
|
||||
/// Request to toggle auto-join for a unit. Queued on GameState.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AutoJoinRequest {
|
||||
pub player_index: u8,
|
||||
pub unit_id: u32,
|
||||
pub enabled: bool,
|
||||
}
|
||||
|
||||
impl Formation {
|
||||
pub fn new(id: u32, owner: u8, leader_id: u32) -> Self {
|
||||
let mut slot_assignments = HashMap::new();
|
||||
// Leader defaults to the centre slot per `HEX_GEOMETRY.md` §11
|
||||
// ("the leader sits at the centre, always") — call sites can
|
||||
// override via `assign_slot` if needed.
|
||||
slot_assignments.insert(leader_id, FormationSlot::Centre);
|
||||
Self {
|
||||
id,
|
||||
owner,
|
||||
unit_ids: vec![leader_id],
|
||||
leader_id,
|
||||
shape: FormationShape::Line { width: 1 },
|
||||
command: FormationCommand::Defend,
|
||||
rally_origin: None,
|
||||
slot_assignments,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
self.unit_ids.len()
|
||||
}
|
||||
|
||||
/// Assign a unit to a slot. Replaces any prior assignment for that unit.
|
||||
/// Does **not** add the unit to `unit_ids` — callers manage membership
|
||||
/// separately so this is an idempotent slot rebind.
|
||||
pub fn assign_slot(&mut self, unit_id: u32, slot: FormationSlot) {
|
||||
self.slot_assignments.insert(unit_id, slot);
|
||||
}
|
||||
|
||||
/// The unit currently in the centre slot, if any. Defaults to the
|
||||
/// leader for legacy formations without slot data.
|
||||
pub fn centre_unit(&self) -> Option<u32> {
|
||||
if self.slot_assignments.is_empty() {
|
||||
return Some(self.leader_id);
|
||||
}
|
||||
self.slot_assignments
|
||||
.iter()
|
||||
.find(|(_, slot)| slot.is_centre())
|
||||
.map(|(id, _)| *id)
|
||||
}
|
||||
|
||||
/// The unit on the given edge direction, if any.
|
||||
pub fn edge_unit(&self, dir: u8) -> Option<u32> {
|
||||
self.slot_assignments
|
||||
.iter()
|
||||
.find(|(_, slot)| slot.edge_dir() == Some(dir))
|
||||
.map(|(id, _)| *id)
|
||||
}
|
||||
|
||||
/// All edge directions currently occupied, in ascending order.
|
||||
pub fn occupied_edges(&self) -> Vec<u8> {
|
||||
let mut dirs: Vec<u8> = self
|
||||
.slot_assignments
|
||||
.values()
|
||||
.filter_map(|s| s.edge_dir())
|
||||
.collect();
|
||||
dirs.sort_unstable();
|
||||
dirs.dedup();
|
||||
dirs
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn new_formation_assigns_leader_to_centre() {
|
||||
let f = Formation::new(1, 0, 99);
|
||||
assert_eq!(f.centre_unit(), Some(99));
|
||||
assert!(f.occupied_edges().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assign_edge_then_query() {
|
||||
let mut f = Formation::new(1, 0, 99);
|
||||
f.assign_slot(101, FormationSlot::Edge { dir: 0 });
|
||||
f.assign_slot(102, FormationSlot::Edge { dir: 3 });
|
||||
assert_eq!(f.edge_unit(0), Some(101));
|
||||
assert_eq!(f.edge_unit(3), Some(102));
|
||||
assert_eq!(f.edge_unit(5), None);
|
||||
assert_eq!(f.occupied_edges(), vec![0, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assign_slot_is_idempotent_rebind() {
|
||||
let mut f = Formation::new(1, 0, 99);
|
||||
f.assign_slot(101, FormationSlot::Edge { dir: 0 });
|
||||
f.assign_slot(101, FormationSlot::Edge { dir: 5 });
|
||||
assert_eq!(f.edge_unit(0), None, "old slot must be released");
|
||||
assert_eq!(f.edge_unit(5), Some(101), "new slot must hold the unit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_formation_without_slot_data_defaults_centre_to_leader() {
|
||||
// Simulate a save loaded with #[serde(default)] empty slot_assignments.
|
||||
let f = Formation {
|
||||
id: 1,
|
||||
owner: 0,
|
||||
unit_ids: vec![99],
|
||||
leader_id: 99,
|
||||
shape: FormationShape::Line { width: 1 },
|
||||
command: FormationCommand::Defend,
|
||||
rally_origin: None,
|
||||
slot_assignments: HashMap::new(),
|
||||
};
|
||||
assert_eq!(f.centre_unit(), Some(99));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn formation_round_trips_through_serde_with_slot_assignments() {
|
||||
let mut f = Formation::new(7, 1, 99);
|
||||
f.assign_slot(101, FormationSlot::Edge { dir: 0 });
|
||||
f.assign_slot(102, FormationSlot::Edge { dir: 3 });
|
||||
|
||||
let json = serde_json::to_string(&f).expect("serialize");
|
||||
let parsed: Formation = serde_json::from_str(&json).expect("deserialize");
|
||||
assert_eq!(parsed.id, 7);
|
||||
assert_eq!(parsed.leader_id, 99);
|
||||
assert_eq!(parsed.slot_assignments.len(), 3);
|
||||
assert_eq!(parsed.centre_unit(), Some(99));
|
||||
assert_eq!(parsed.edge_unit(0), Some(101));
|
||||
assert_eq!(parsed.edge_unit(3), Some(102));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn formation_slot_centre_serializes_with_stable_json_shape() {
|
||||
// The JSON shape is consumed by GDExtension on the Godot side —
|
||||
// changing the serde attributes (tag name, rename_all) here
|
||||
// would silently break those consumers. This test locks the wire
|
||||
// format.
|
||||
let json = serde_json::to_string(&FormationSlot::Centre).expect("serialize");
|
||||
assert_eq!(json, r#"{"type":"centre"}"#);
|
||||
|
||||
let parsed: FormationSlot = serde_json::from_str(&json).expect("deserialize");
|
||||
assert_eq!(parsed, FormationSlot::Centre);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn formation_slot_edge_serializes_with_stable_json_shape() {
|
||||
// Struct-variant form: `{"type":"edge","dir":N}`.
|
||||
let slot = FormationSlot::Edge { dir: 5 };
|
||||
let json = serde_json::to_string(&slot).expect("serialize");
|
||||
assert_eq!(json, r#"{"type":"edge","dir":5}"#);
|
||||
|
||||
let parsed: FormationSlot = serde_json::from_str(&json).expect("deserialize");
|
||||
assert_eq!(parsed, slot);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_formation_json_without_slot_assignments_deserializes_via_serde_default() {
|
||||
// Save written before Formation::slot_assignments existed —
|
||||
// missing the field entirely. `#[serde(default)]` must let it
|
||||
// deserialize as an empty map; `centre_unit()` falls back to
|
||||
// `leader_id` when the map is empty.
|
||||
let legacy_json = r#"{
|
||||
"id": 5,
|
||||
"owner": 0,
|
||||
"unit_ids": [42],
|
||||
"leader_id": 42,
|
||||
"shape": {"type": "line", "width": 1},
|
||||
"command": {"type": "defend"},
|
||||
"rally_origin": null
|
||||
}"#;
|
||||
let parsed: Formation = serde_json::from_str(legacy_json)
|
||||
.expect("legacy formation JSON without slot_assignments must deserialize");
|
||||
assert!(parsed.slot_assignments.is_empty());
|
||||
assert_eq!(
|
||||
parsed.centre_unit(),
|
||||
Some(42),
|
||||
"legacy formations must fall back to leader_id for centre_unit()"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn formation_slot_helpers() {
|
||||
assert!(FormationSlot::Centre.is_centre());
|
||||
assert!(!FormationSlot::Edge { dir: 2 }.is_centre());
|
||||
assert_eq!(FormationSlot::Centre.edge_dir(), None);
|
||||
assert_eq!(FormationSlot::Edge { dir: 2 }.edge_dir(), Some(2));
|
||||
}
|
||||
}
|
||||
390
src/simulator/crates/mc-turn/src/policy.rs
Normal file
390
src/simulator/crates/mc-turn/src/policy.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
//! Task B3 prep — clan-aware rollout policy priors.
|
||||
//!
|
||||
//! Pure, isolated API. No dependency on `mcts_tree` or a concrete rollout
|
||||
//! state yet — those wire in once Task #2 lands the real CPU rollout. For now
|
||||
//! this module owns:
|
||||
//! - `ActionKind` — the coarse action taxonomy the rollout policy picks from
|
||||
//! - `PersonalityPriors` — the six raw axes (1..=10) carried per-player
|
||||
//! - `PersonalityPriors::action_prior(kind) -> f32` — raw bias score
|
||||
//! - `PersonalityPriors::action_distribution(&[ActionKind]) -> Vec<f32>` —
|
||||
//! temperature-softmaxed distribution over a candidate set
|
||||
//!
|
||||
//! The divergence test (Ironhold biases `Build` > 0.4, Blackhammer biases
|
||||
//! `Attack` > 0.4) lives in `tests/clan_policy_priors.rs` and runs today.
|
||||
//! When Task #2 is green, `mcts_tree::TreeState::prior()` will call
|
||||
//! `PersonalityPriors::action_prior` on the action derived from each child.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::evaluator::{LoadError, PersonalityDef};
|
||||
|
||||
/// Coarse action taxonomy the MCTS rollout policy samples from. Concrete
|
||||
/// candidates (unit ids, building ids, tech ids, tile targets) are grouped
|
||||
/// into these buckets so a single personality prior can bias the whole rollout
|
||||
/// without needing per-id knobs.
|
||||
///
|
||||
/// # Rollout vs. strategic variants
|
||||
///
|
||||
/// `ActionKind::ALL` contains only the **9 rollout-legal kinds** that the WGSL
|
||||
/// shader and `GameRolloutState::active_actions` enumerate. The discriminants
|
||||
/// 0–8 are load-bearing: the WGSL `switch` in `action_prior` and
|
||||
/// `apply_active` index directly into them — do NOT reorder or insert into
|
||||
/// `ALL` without updating `rollout.wgsl` in lockstep.
|
||||
///
|
||||
/// `CommandFormation` and `SetRallyPoint` are **strategic-planning variants**
|
||||
/// used by the MCTS candidate generator (`build_formation_candidates`) and
|
||||
/// scored by `score_action`. They are never emitted by `active_actions()` and
|
||||
/// therefore never reach the rollout shader.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||
pub enum ActionKind {
|
||||
/// Queue a production building or non-military improvement.
|
||||
Build,
|
||||
/// Queue a military unit or start an offensive move.
|
||||
Attack,
|
||||
/// Found a new city.
|
||||
Settle,
|
||||
/// Start or continue tech research.
|
||||
Research,
|
||||
/// Queue walls / defensive units / garrison, or fortify in place.
|
||||
Defend,
|
||||
/// Gold-side action: market, trade route, rush-buy.
|
||||
Trade,
|
||||
/// Continue an existing war (ignore peace offer) when grudge is high.
|
||||
ContinueWar,
|
||||
/// Accept a peace offer or decline to re-engage.
|
||||
MakePeace,
|
||||
/// No-op / skip turn.
|
||||
Idle,
|
||||
/// Issue a movement/combat command to an existing formation (p0-43).
|
||||
/// The candidate `choice_id` encodes the target as
|
||||
/// `"cmd_formation:{formation_id}:{command}:{hex_q},{hex_r}"`.
|
||||
/// Not part of the GPU rollout — strategic planning only.
|
||||
CommandFormation,
|
||||
/// Set a rally point on a barracks or military building (p0-43).
|
||||
/// The candidate `choice_id` encodes the target as
|
||||
/// `"set_rally:{city_id}:{building_id}:{hex_q},{hex_r}:{command}"`.
|
||||
/// Not part of the GPU rollout — strategic planning only.
|
||||
SetRallyPoint,
|
||||
}
|
||||
|
||||
impl ActionKind {
|
||||
/// The 9 rollout-legal action kinds. Order is load-bearing — WGSL
|
||||
/// `action_prior` / `apply_active` switch on discriminant 0..=8 mapped
|
||||
/// to this order. Never extend without updating `rollout.wgsl`.
|
||||
pub const ALL: [ActionKind; 9] = [
|
||||
ActionKind::Build,
|
||||
ActionKind::Attack,
|
||||
ActionKind::Settle,
|
||||
ActionKind::Research,
|
||||
ActionKind::Defend,
|
||||
ActionKind::Trade,
|
||||
ActionKind::ContinueWar,
|
||||
ActionKind::MakePeace,
|
||||
ActionKind::Idle,
|
||||
];
|
||||
|
||||
/// Best-effort classifier from the loose string tags used by
|
||||
/// `mcts::Candidate::choice_type` today (`"unit"`, `"building"`, `"item"`,
|
||||
/// `"tech"`). Callers that know more context should pass `ActionKind`
|
||||
/// directly instead of round-tripping through this classifier.
|
||||
pub fn from_choice_type(choice_type: &str, combat_type: &str) -> Self {
|
||||
match choice_type {
|
||||
"building" => ActionKind::Build,
|
||||
"unit" => match combat_type {
|
||||
"civilian" | "specialist" => ActionKind::Build,
|
||||
_ => ActionKind::Attack,
|
||||
},
|
||||
"item" => ActionKind::Build,
|
||||
"tech" => ActionKind::Research,
|
||||
"found_city" | "settle" => ActionKind::Settle,
|
||||
"command_formation" => ActionKind::CommandFormation,
|
||||
"set_rally" => ActionKind::SetRallyPoint,
|
||||
_ => ActionKind::Idle,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw six-axis personality payload, kept on the JSON 1..=10 scale (5 = neutral).
|
||||
/// This is the rollout-policy's source of truth — `StrategicWeights` is a
|
||||
/// lossier five-knob projection used by the state evaluator.
|
||||
#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PersonalityPriors {
|
||||
pub aggression: f32,
|
||||
pub expansion: f32,
|
||||
pub production: f32,
|
||||
pub wealth: f32,
|
||||
pub trade_willingness: f32,
|
||||
pub grudge_persistence: f32,
|
||||
}
|
||||
|
||||
impl Default for PersonalityPriors {
|
||||
/// Neutral personality — every axis at 5 (= 0 after centering).
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
aggression: 5.0,
|
||||
expansion: 5.0,
|
||||
production: 5.0,
|
||||
wealth: 5.0,
|
||||
trade_willingness: 5.0,
|
||||
grudge_persistence: 5.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PersonalityPriors {
|
||||
/// Load from `<data_dir>/ai_personalities.json`, picking the named clan.
|
||||
pub fn from_personality(id: &str, data_dir: &Path) -> Result<Self, LoadError> {
|
||||
let path = data_dir.join("ai_personalities.json");
|
||||
let json = std::fs::read_to_string(&path).map_err(|source| LoadError::Io {
|
||||
path: path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let personalities: HashMap<String, PersonalityDef> =
|
||||
serde_json::from_str(&json).map_err(|source| LoadError::Parse {
|
||||
path: path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let p = personalities
|
||||
.get(id)
|
||||
.ok_or_else(|| LoadError::UnknownClan(id.to_string()))?;
|
||||
Ok(Self::from_axes(&p.strategic_axes))
|
||||
}
|
||||
|
||||
/// Construct from a raw 1..=10 axis map. Missing keys default to 5
|
||||
/// (neutral). Out-of-range values clamp to `[1, 10]`.
|
||||
pub fn from_axes(axes: &HashMap<String, i32>) -> Self {
|
||||
let axis = |key: &str| -> f32 {
|
||||
let raw = *axes.get(key).unwrap_or(&5);
|
||||
raw.clamp(1, 10) as f32
|
||||
};
|
||||
Self {
|
||||
aggression: axis("aggression"),
|
||||
expansion: axis("expansion"),
|
||||
production: axis("production"),
|
||||
wealth: axis("wealth"),
|
||||
trade_willingness: axis("trade_willingness"),
|
||||
grudge_persistence: axis("grudge_persistence"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the centered delta for an axis: `axis - 5` clamped to `[-4, +5]`.
|
||||
/// Positive = push, negative = pull. Used as the coefficient on per-kind
|
||||
/// prior contributions below.
|
||||
fn delta(&self, axis: f32) -> f32 {
|
||||
(axis - 5.0).clamp(-4.0, 5.0)
|
||||
}
|
||||
|
||||
/// Raw bias score for a single action kind. Range is roughly `[-2, +3]`
|
||||
/// for realistic personalities (baseline = 0). The policy caller softmaxes
|
||||
/// over a candidate set to produce a probability distribution.
|
||||
///
|
||||
/// Mapping rationale (each axis contributes ~0.2 per point of delta):
|
||||
/// - Build: + production, + expansion (settlers are build-adjacent)
|
||||
/// - Attack: + aggression, − grudge (wait, grudge pushes war continuation,
|
||||
/// not initial attacks — so keep Attack driven by aggression alone)
|
||||
/// - Settle: + expansion
|
||||
/// - Research: + wealth × 0.5 (scholarly clans fund research via gold)
|
||||
/// - Defend: − aggression, + production × 0.5
|
||||
/// - Trade: + trade_willingness, + wealth × 0.5
|
||||
/// - ContinueWar: + grudge_persistence, + aggression × 0.5
|
||||
/// - MakePeace: − grudge_persistence, − aggression × 0.5
|
||||
/// - Idle: always 0 (baseline)
|
||||
pub fn action_prior(&self, kind: ActionKind) -> f32 {
|
||||
let prod = self.delta(self.production);
|
||||
let agg = self.delta(self.aggression);
|
||||
let exp = self.delta(self.expansion);
|
||||
let wealth = self.delta(self.wealth);
|
||||
let trade = self.delta(self.trade_willingness);
|
||||
let grudge = self.delta(self.grudge_persistence);
|
||||
|
||||
match kind {
|
||||
ActionKind::Build => 0.20 * prod + 0.08 * exp,
|
||||
// Attack's coefficient is higher than Build's per-axis weight so
|
||||
// that high-aggression clans (Blackhammer: aggression=9) clear the
|
||||
// B3 bullet of <30% Build rollout mass on a 2B/2A/1S slate, while
|
||||
// low-aggression clans (Ironhold: aggression=6) still land in the
|
||||
// Attack-suppressed regime their production axis wants.
|
||||
ActionKind::Attack => 0.30 * agg,
|
||||
ActionKind::Settle => 0.22 * exp,
|
||||
ActionKind::Research => 0.12 * wealth + 0.05 * prod,
|
||||
ActionKind::Defend => -0.15 * agg + 0.10 * prod,
|
||||
ActionKind::Trade => 0.18 * trade + 0.10 * wealth,
|
||||
ActionKind::ContinueWar => 0.20 * grudge + 0.10 * agg,
|
||||
ActionKind::MakePeace => -0.20 * grudge - 0.10 * agg,
|
||||
ActionKind::Idle => 0.0,
|
||||
// Strategic-planning variants (p0-43). Not part of the GPU rollout;
|
||||
// priors are used only when these candidates appear in MCTS selection.
|
||||
// CommandFormation scores with aggression (advancing troops is offensive).
|
||||
ActionKind::CommandFormation => 0.25 * agg,
|
||||
// SetRallyPoint is a mild production-axis action (building infrastructure).
|
||||
// TODO(p2-53c): AI rally-command policy — choose Hold/Defend/Fortify/JoinFormation/Patrol/Advance
|
||||
// based on city threat level, frontier proximity, and strategic axis.
|
||||
// Default for now: all SetRallyPoint uses the same flat prior (Defend behaviour at runtime).
|
||||
ActionKind::SetRallyPoint => 0.10 * prod,
|
||||
}
|
||||
}
|
||||
|
||||
/// Softmax probability distribution over a candidate set of action kinds.
|
||||
/// `temperature` shapes how sharply the distribution peaks around the
|
||||
/// highest-prior kind; `1.0` is a reasonable default. Lower = sharper.
|
||||
///
|
||||
/// The returned vector has the same length and order as `kinds` and sums
|
||||
/// to `1.0` (within float precision). Duplicate kinds are allowed — each
|
||||
/// occurrence gets its own slot so callers can feed in one entry per legal
|
||||
/// candidate action without de-duplicating.
|
||||
pub fn action_distribution(&self, kinds: &[ActionKind], temperature: f32) -> Vec<f32> {
|
||||
if kinds.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let t = temperature.max(0.05);
|
||||
let priors: Vec<f32> = kinds.iter().map(|&k| self.action_prior(k) / t).collect();
|
||||
let max = priors.iter().copied().fold(f32::NEG_INFINITY, f32::max);
|
||||
let exps: Vec<f32> = priors.iter().map(|p| (p - max).exp()).collect();
|
||||
let sum: f32 = exps.iter().sum();
|
||||
if sum <= 0.0 {
|
||||
return vec![1.0 / kinds.len() as f32; kinds.len()];
|
||||
}
|
||||
exps.into_iter().map(|e| e / sum).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn ironhold() -> PersonalityPriors {
|
||||
PersonalityPriors {
|
||||
aggression: 6.0,
|
||||
expansion: 4.0,
|
||||
production: 9.0,
|
||||
wealth: 3.0,
|
||||
trade_willingness: 3.0,
|
||||
grudge_persistence: 7.0,
|
||||
}
|
||||
}
|
||||
|
||||
fn blackhammer() -> PersonalityPriors {
|
||||
PersonalityPriors {
|
||||
aggression: 9.0,
|
||||
expansion: 6.0,
|
||||
production: 7.0,
|
||||
wealth: 2.0,
|
||||
trade_willingness: 2.0,
|
||||
grudge_persistence: 9.0,
|
||||
}
|
||||
}
|
||||
|
||||
fn goldvein() -> PersonalityPriors {
|
||||
PersonalityPriors {
|
||||
aggression: 3.0,
|
||||
expansion: 5.0,
|
||||
production: 5.0,
|
||||
wealth: 9.0,
|
||||
trade_willingness: 9.0,
|
||||
grudge_persistence: 4.0,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn neutral_priors_are_zero_across_kinds() {
|
||||
let neutral = PersonalityPriors::default();
|
||||
for k in ActionKind::ALL {
|
||||
let p = neutral.action_prior(k);
|
||||
assert!(p.abs() < 1e-5, "neutral prior for {k:?} was {p}, expected 0");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_choice_type_classifies_building_as_build() {
|
||||
assert_eq!(ActionKind::from_choice_type("building", ""), ActionKind::Build);
|
||||
assert_eq!(ActionKind::from_choice_type("unit", "melee"), ActionKind::Attack);
|
||||
assert_eq!(ActionKind::from_choice_type("unit", "civilian"), ActionKind::Build);
|
||||
assert_eq!(ActionKind::from_choice_type("tech", ""), ActionKind::Research);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distribution_sums_to_one_and_preserves_order() {
|
||||
let kinds = [ActionKind::Build, ActionKind::Attack, ActionKind::Settle];
|
||||
let dist = ironhold().action_distribution(&kinds, 1.0);
|
||||
assert_eq!(dist.len(), kinds.len());
|
||||
let sum: f32 = dist.iter().sum();
|
||||
assert!((sum - 1.0).abs() < 1e-4, "distribution must sum to 1, got {sum}");
|
||||
for p in &dist {
|
||||
assert!(*p > 0.0, "softmax must produce strictly positive probs");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_kinds_produces_empty_distribution() {
|
||||
assert!(ironhold().action_distribution(&[], 1.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duplicate_kinds_get_independent_slots() {
|
||||
let kinds = [ActionKind::Build, ActionKind::Build, ActionKind::Attack];
|
||||
let dist = ironhold().action_distribution(&kinds, 1.0);
|
||||
// Two Build slots must be equal (same kind, same prior input).
|
||||
assert!((dist[0] - dist[1]).abs() < 1e-5);
|
||||
// Combined Build mass must exceed Attack mass for Ironhold.
|
||||
assert!(dist[0] + dist[1] > dist[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ironhold_biases_build_over_attack() {
|
||||
let iron = ironhold();
|
||||
let build = iron.action_prior(ActionKind::Build);
|
||||
let attack = iron.action_prior(ActionKind::Attack);
|
||||
assert!(
|
||||
build > attack,
|
||||
"ironhold must prefer Build over Attack: build={build} attack={attack}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blackhammer_biases_attack_over_build() {
|
||||
let bh = blackhammer();
|
||||
let build = bh.action_prior(ActionKind::Build);
|
||||
let attack = bh.action_prior(ActionKind::Attack);
|
||||
assert!(
|
||||
attack > build,
|
||||
"blackhammer must prefer Attack over Build: attack={attack} build={build}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn goldvein_biases_trade_over_attack() {
|
||||
let gv = goldvein();
|
||||
let trade = gv.action_prior(ActionKind::Trade);
|
||||
let attack = gv.action_prior(ActionKind::Attack);
|
||||
assert!(
|
||||
trade > attack,
|
||||
"goldvein must prefer Trade over Attack: trade={trade} attack={attack}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blackhammer_prefers_continue_war_over_make_peace() {
|
||||
let bh = blackhammer();
|
||||
let cont = bh.action_prior(ActionKind::ContinueWar);
|
||||
let peace = bh.action_prior(ActionKind::MakePeace);
|
||||
assert!(
|
||||
cont > peace,
|
||||
"high-grudge blackhammer must prefer ContinueWar: cont={cont} peace={peace}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn temperature_sharpens_distribution() {
|
||||
let kinds = [ActionKind::Build, ActionKind::Attack];
|
||||
let iron = ironhold();
|
||||
let soft = iron.action_distribution(&kinds, 2.0);
|
||||
let sharp = iron.action_distribution(&kinds, 0.5);
|
||||
// Ironhold prefers Build, so low temperature must concentrate more
|
||||
// probability on the Build slot.
|
||||
assert!(
|
||||
sharp[0] > soft[0],
|
||||
"lower temperature must sharpen toward the preferred action: sharp={sharp:?} soft={soft:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue