feat(@projects/@magic-civilization): ✨ update fauna encounter rng to player-threaded mode
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
0e8cec35ce
commit
673dd56cd8
4 changed files with 100 additions and 68 deletions
2
src/simulator/Cargo.lock
generated
2
src/simulator/Cargo.lock
generated
|
|
@ -1029,6 +1029,8 @@ dependencies = [
|
|||
"mc-city",
|
||||
"mc-combat",
|
||||
"mc-core",
|
||||
"mc-economy",
|
||||
"mc-happiness",
|
||||
"pollster 0.3.0",
|
||||
"proptest",
|
||||
"serde",
|
||||
|
|
|
|||
|
|
@ -3,11 +3,15 @@
|
|||
// Ports the inner RNG rolls of process_fauna_encounters_inner to GPU.
|
||||
// WGSL has no u64, so SplitMix64 is implemented via 32-bit schoolbook multiply.
|
||||
//
|
||||
// Input contract: units are sorted by tile_idx ascending within each player
|
||||
// (enforced by the Rust host) so RNG stream is byte-identical to the CPU path.
|
||||
// RNG determinism: the CPU path uses a single sequential per-player RNG stream —
|
||||
// each unit consumes state left by the previous unit in sorted tile order. To
|
||||
// match byte-for-byte we run all units for ONE player in a SINGLE thread, looping
|
||||
// sequentially. One workgroup dispatch = one player. Parallelism across players
|
||||
// is achieved by the host issuing one dispatch per player (separate command
|
||||
// encoder submissions).
|
||||
//
|
||||
// Bindings (group 0):
|
||||
// 0 unit_tiles array<u32> flat tile = col*H + row, per unit
|
||||
// 0 unit_tiles array<u32> flat tile = row*W + col (row-major, matches LairIndexCsr)
|
||||
// 1 unit_meta array<u32> bit0=fortified, bits1..7=player_idx
|
||||
// 2 player_rng array<u32> 2 u32 per player (lo,hi of SplitMix64 state), read_write
|
||||
// 3 csr_offsets array<u32> CSR offsets, len = W*H + 1
|
||||
|
|
@ -25,7 +29,7 @@ struct Uniforms {
|
|||
tier_kill_exponent: f32,
|
||||
fortify_divisor: f32,
|
||||
encounter_probability: f32,
|
||||
_pad: u32,
|
||||
player_idx: u32,
|
||||
}
|
||||
|
||||
@group(0) @binding(0) var<storage, read> unit_tiles: array<u32>;
|
||||
|
|
@ -106,42 +110,45 @@ fn kill_probability(tier: u32, fortified: bool, u: Uniforms) -> f32 {
|
|||
return raw;
|
||||
}
|
||||
|
||||
@compute @workgroup_size(64, 1, 1)
|
||||
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
||||
let uid = gid.x;
|
||||
// Single-threaded sequential loop over all units for one player.
|
||||
// Dispatched as (1, 1, 1) — no intra-workgroup parallelism on RNG state.
|
||||
@compute @workgroup_size(1, 1, 1)
|
||||
fn main() {
|
||||
let uni = uniforms[0];
|
||||
if uid >= uni.n_units { return; }
|
||||
|
||||
let tile = unit_tiles[uid];
|
||||
let um = unit_meta[uid];
|
||||
let pi = (um >> 1u) & 0x7Fu;
|
||||
let fort = (um & 1u) != 0u;
|
||||
let pi = uni.player_idx;
|
||||
|
||||
var rlo = player_rng[pi * 2u];
|
||||
var rhi = player_rng[pi * 2u + 1u];
|
||||
|
||||
if tile >= uni.grid_width * uni.grid_height {
|
||||
kill_flags[uid] = 0u;
|
||||
return;
|
||||
}
|
||||
for (var uid = 0u; uid < uni.n_units; uid++) {
|
||||
let tile = unit_tiles[uid];
|
||||
let um = unit_meta[uid];
|
||||
let fort = (um & 1u) != 0u;
|
||||
|
||||
let start = csr_offsets[tile];
|
||||
let end = csr_offsets[tile + 1u];
|
||||
var killed = false;
|
||||
if tile >= uni.grid_width * uni.grid_height {
|
||||
kill_flags[uid] = 0u;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (var p = start; p < end; p++) {
|
||||
let tier = lair_tiers[csr_lairs[p]];
|
||||
let start = csr_offsets[tile];
|
||||
let end = csr_offsets[tile + 1u];
|
||||
var killed = false;
|
||||
|
||||
// Encounter gate roll
|
||||
let s1 = smix_step(rlo, rhi); rlo = s1.x; rhi = s1.y;
|
||||
if rand_f32(rhi) > uni.encounter_probability { continue; }
|
||||
for (var p = start; p < end; p++) {
|
||||
let tier = lair_tiers[csr_lairs[p]];
|
||||
|
||||
// Kill roll
|
||||
let s2 = smix_step(rlo, rhi); rlo = s2.x; rhi = s2.y;
|
||||
if rand_f32(rhi) <= kill_probability(tier, fort, uni) { killed = true; }
|
||||
// Encounter gate roll
|
||||
let s1 = smix_step(rlo, rhi); rlo = s1.x; rhi = s1.y;
|
||||
if rand_f32(rhi) > uni.encounter_probability { continue; }
|
||||
|
||||
// Kill roll
|
||||
let s2 = smix_step(rlo, rhi); rlo = s2.x; rhi = s2.y;
|
||||
if rand_f32(rhi) <= kill_probability(tier, fort, uni) { killed = true; }
|
||||
}
|
||||
|
||||
kill_flags[uid] = select(0u, 1u, killed);
|
||||
}
|
||||
|
||||
player_rng[pi * 2u] = rlo;
|
||||
player_rng[pi * 2u + 1u] = rhi;
|
||||
kill_flags[uid] = select(0u, 1u, killed);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ mod inner {
|
|||
/// Minimal per-unit data needed by the shader.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GpuUnit {
|
||||
/// Flat tile index: col * grid_height + row
|
||||
/// Flat tile index: row * grid_width + col (row-major, matches LairIndexCsr)
|
||||
pub tile_idx: u32,
|
||||
/// bit0 = is_fortified, bits 1..7 = player_index (max 127 players)
|
||||
pub meta: u32,
|
||||
|
|
@ -44,7 +44,7 @@ mod inner {
|
|||
tier_kill_exponent: f32,
|
||||
fortify_divisor: f32,
|
||||
encounter_probability: f32,
|
||||
_pad0: u32,
|
||||
player_idx: u32,
|
||||
}
|
||||
|
||||
/// Lazy-initialized wgpu context. Created once, reused across dispatches.
|
||||
|
|
@ -167,7 +167,7 @@ mod inner {
|
|||
tier_kill_exponent: cfg.tier_kill_exponent,
|
||||
fortify_divisor: cfg.fortify_divisor,
|
||||
encounter_probability: cfg.encounter_probability_per_turn,
|
||||
_pad0: 0,
|
||||
player_idx: player_idx as u32,
|
||||
};
|
||||
|
||||
// Upload buffers
|
||||
|
|
@ -195,7 +195,6 @@ mod inner {
|
|||
],
|
||||
});
|
||||
|
||||
let workgroups = (n + 63) / 64;
|
||||
let mut encoder = dev.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||
label: Some("fauna_enc"),
|
||||
});
|
||||
|
|
@ -206,7 +205,7 @@ mod inner {
|
|||
});
|
||||
pass.set_pipeline(&self.pipeline);
|
||||
pass.set_bind_group(0, &bind_group, &[]);
|
||||
pass.dispatch_workgroups(workgroups, 1, 1);
|
||||
pass.dispatch_workgroups(1, 1, 1);
|
||||
}
|
||||
|
||||
// Readback: copy kill_flags to a MAP_READ staging buffer.
|
||||
|
|
@ -436,7 +435,7 @@ mod inner {
|
|||
// Sort units by tile_idx ascending (required for RNG determinism)
|
||||
let mut gpu_units: Vec<GpuUnit> = gpu_state.players[pi].units.iter()
|
||||
.map(|u| {
|
||||
let tile = u.col * H + u.row;
|
||||
let tile = u.row * W + u.col;
|
||||
let meta = (pi as u32) << 1 | (u.is_fortified as u32);
|
||||
GpuUnit { tile_idx: tile as u32, meta }
|
||||
})
|
||||
|
|
@ -462,8 +461,12 @@ mod inner {
|
|||
"GPU kill_flags must be byte-identical to CPU across {TURNS} turns with seed={SEED}");
|
||||
}
|
||||
|
||||
/// Scalar parity: smix_step(lo, hi) == hash_mix(state, SALT) for N iterations.
|
||||
/// Catches any constant or bit-shift mismatch between CPU and WGSL.
|
||||
/// Scalar parity: one GPU smix_step must equal one CPU hash_mix(state, SALT) call.
|
||||
///
|
||||
/// Uses encounter_probability=1.0 and base_kill_rate=0.0 so each dispatch
|
||||
/// executes exactly 2 smix_steps (encounter gate + kill roll, kill never fires).
|
||||
/// CPU mirrors the same 2 rand_unit calls. 16 iterations catch any constant
|
||||
/// or bit-shift divergence early.
|
||||
#[test]
|
||||
fn smix_step_matches_cpu_hash_mix() {
|
||||
let ctx = match GpuContext::try_init() {
|
||||
|
|
@ -474,31 +477,33 @@ mod inner {
|
|||
}
|
||||
};
|
||||
|
||||
// Drive 16 single-unit dispatches with no lairs so the only RNG output
|
||||
// is the unchanged rng_state readback. We verify state evolves identically.
|
||||
let cfg = LairCombatConfig::default();
|
||||
let empty_csr = LairIndexCsr { offsets: vec![0u32; 2], flat_lair_ids: vec![] };
|
||||
let lair_tiers: Vec<u32> = vec![];
|
||||
// encounter_probability=1.0 → gate always passes (2 steps per lair)
|
||||
// base_kill_rate=0.0 → kill never fires, unit survives
|
||||
let cfg = LairCombatConfig {
|
||||
encounter_probability_per_turn: 1.0,
|
||||
base_kill_rate: 0.0,
|
||||
..LairCombatConfig::default()
|
||||
};
|
||||
// One lair on tile 0 (grid 2×1 so tile 0 exists)
|
||||
let one_lair_csr = LairIndexCsr {
|
||||
offsets: vec![0u32, 1u32, 1u32],
|
||||
flat_lair_ids: vec![0u32],
|
||||
};
|
||||
let one_tier: Vec<u32> = vec![1u32];
|
||||
let unit = GpuUnit { tile_idx: 0, meta: 0 };
|
||||
|
||||
let mut cpu_state: u64 = 0xDEAD_C0DE_1234_5678;
|
||||
let mut gpu_state: u64 = cpu_state;
|
||||
|
||||
for _ in 0..16 {
|
||||
// CPU: one rand_unit call = one hash_mix step
|
||||
let (_, next) = rand_unit(cpu_state);
|
||||
cpu_state = next;
|
||||
// CPU: 2 rand_unit steps (encounter gate + kill roll)
|
||||
let (_, s1) = rand_unit(cpu_state);
|
||||
let (_, s2) = rand_unit(s1);
|
||||
cpu_state = s2;
|
||||
|
||||
// GPU: dispatch a single unit on a tile with no lairs.
|
||||
// The kernel still calls smix_step once for the (skipped) encounter roll.
|
||||
let unit = GpuUnit { tile_idx: 0, meta: 0 };
|
||||
// Patch csr so tile 0 has one lair entry (forcing one encounter roll).
|
||||
let one_lair_csr = LairIndexCsr {
|
||||
offsets: vec![0u32, 1u32, 1u32],
|
||||
flat_lair_ids: vec![0u32],
|
||||
};
|
||||
let one_tier: Vec<u32> = vec![1u32];
|
||||
// GPU: dispatch one unit — kernel runs 2 smix_steps for the same lair
|
||||
ctx.dispatch_player_fauna(
|
||||
&[unit],
|
||||
&[unit.clone()],
|
||||
0,
|
||||
&mut gpu_state,
|
||||
&one_lair_csr,
|
||||
|
|
@ -509,9 +514,8 @@ mod inner {
|
|||
);
|
||||
}
|
||||
|
||||
// After 16 identical steps the states must agree.
|
||||
assert_eq!(cpu_state, gpu_state,
|
||||
"smix_step must produce byte-identical output to CPU hash_mix after 16 steps");
|
||||
"smix_step must be byte-identical to CPU hash_mix after 16×2 steps");
|
||||
}
|
||||
|
||||
/// Fallback: when no GPU is available, dispatch returns empty without panicking.
|
||||
|
|
|
|||
|
|
@ -39,16 +39,31 @@ impl VictoryType {
|
|||
|
||||
// ── Score formula ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Weights (tuneable — start conservative to avoid over-rewarding a single axis):
|
||||
// cities × 50 — settled territory is the backbone of any lead
|
||||
// population × 5 — sustained growth beats quick burst expansion
|
||||
// techs × 10 — research is a multiplier on every other axis
|
||||
// gold / 10 — wealth matters but liquid gold decays quickly
|
||||
// units × 2 — surviving military signals combat power, not kills
|
||||
//
|
||||
// No wonders_built term: PlayerState carries no wonder count; the GDScript
|
||||
// layer can augment the displayed score but the Rust tiebreak runs on the
|
||||
// above fields only.
|
||||
// Tuneable per-axis weights. Rationale on each constant: scoring rewards
|
||||
// durable sources of power (territory, population, research) more than
|
||||
// liquid or ephemeral ones (gold, individual units). No wonders_built term
|
||||
// because PlayerState carries no wonder count; the GDScript layer can
|
||||
// augment the displayed score but the Rust tiebreak runs on these fields
|
||||
// only.
|
||||
|
||||
/// Points per city owned — settled territory is the backbone of any lead.
|
||||
pub const SCORE_WEIGHT_PER_CITY: i64 = 50;
|
||||
|
||||
/// Points per population unit — sustained growth beats quick burst expansion.
|
||||
pub const SCORE_WEIGHT_PER_POP: i64 = 5;
|
||||
|
||||
/// Points per researched tech — research multiplies every other axis.
|
||||
pub const SCORE_WEIGHT_PER_TECH: i64 = 10;
|
||||
|
||||
/// Divisor on treasury gold — wealth matters but liquid gold decays quickly,
|
||||
/// so a flat division keeps a merchant player from running away with score
|
||||
/// on a single well-played gold turn.
|
||||
pub const SCORE_GOLD_DIVISOR: i64 = 10;
|
||||
|
||||
/// Points per surviving military unit — surviving military signals combat
|
||||
/// power. Intentionally low vs cities because a large but idle army isn't
|
||||
/// a lead on its own.
|
||||
pub const SCORE_WEIGHT_PER_UNIT: i64 = 2;
|
||||
|
||||
/// Compute a deterministic score for a single player.
|
||||
/// Used by `check_score_victory` and exposed for testing.
|
||||
|
|
@ -63,7 +78,11 @@ pub fn calculate_score(player: &PlayerState) -> i64 {
|
|||
let unit_count = player.units.len() as i64;
|
||||
let gold = player.gold.max(0) as i64;
|
||||
|
||||
city_count * 50 + pop_total * 5 + tech_count * 10 + gold / 10 + unit_count * 2
|
||||
city_count * SCORE_WEIGHT_PER_CITY
|
||||
+ pop_total * SCORE_WEIGHT_PER_POP
|
||||
+ tech_count * SCORE_WEIGHT_PER_TECH
|
||||
+ gold / SCORE_GOLD_DIVISOR
|
||||
+ unit_count * SCORE_WEIGHT_PER_UNIT
|
||||
}
|
||||
|
||||
/// Tiebreak at max-turns: player with highest score wins.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue