From 673dd56cd85d6ad3e851da1432aa3f0e94c19dda Mon Sep 17 00:00:00 2001
From: Natalie <natalie@lilithuwu.com>
Date: Thu, 16 Apr 2026 17:34:48 -0700
Subject: [PATCH] =?UTF-8?q?feat(@projects/@magic-civilization):=20?=
 =?UTF-8?q?=E2=9C=A8=20update=20fauna=20encounter=20rng=20to=20player-thre?=
 =?UTF-8?q?aded=20mode?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
---
 src/simulator/Cargo.lock                      |  2 +
 .../mc-turn/src/gpu/fauna_encounter.wgsl      | 65 ++++++++++---------
 src/simulator/crates/mc-turn/src/gpu/mod.rs   | 60 +++++++++--------
 src/simulator/crates/mc-turn/src/victory.rs   | 41 ++++++++----
 4 files changed, 100 insertions(+), 68 deletions(-)
diff --git a/src/simulator/Cargo.lock b/src/simulator/Cargo.lock
index 7bb77e74..e86166f0 100644
--- a/src/simulator/Cargo.lock
+++ b/src/simulator/Cargo.lock
@@ -1029,6 +1029,8 @@ dependencies = [
  "mc-city",
  "mc-combat",
  "mc-core",
+ "mc-economy",
+ "mc-happiness",
  "pollster 0.3.0",
  "proptest",
  "serde",
diff --git a/src/simulator/crates/mc-turn/src/gpu/fauna_encounter.wgsl b/src/simulator/crates/mc-turn/src/gpu/fauna_encounter.wgsl
index 6b66634f..c7f963d5 100644
--- a/src/simulator/crates/mc-turn/src/gpu/fauna_encounter.wgsl
+++ b/src/simulator/crates/mc-turn/src/gpu/fauna_encounter.wgsl
@@ -3,11 +3,15 @@
 // Ports the inner RNG rolls of process_fauna_encounters_inner to GPU.
 // WGSL has no u64, so SplitMix64 is implemented via 32-bit schoolbook multiply.
 //
-// Input contract: units are sorted by tile_idx ascending within each player
-// (enforced by the Rust host) so RNG stream is byte-identical to the CPU path.
+// RNG determinism: the CPU path uses a single sequential per-player RNG stream —
+// each unit consumes state left by the previous unit in sorted tile order.  To
+// match byte-for-byte we run all units for ONE player in a SINGLE thread, looping
+// sequentially.  One workgroup dispatch = one player.  Parallelism across players
+// is achieved by the host issuing one dispatch per player (separate command
+// encoder submissions).
 //
 // Bindings (group 0):
-//   0 unit_tiles    array<u32>  flat tile = col*H + row, per unit
+//   0 unit_tiles    array<u32>  flat tile = row*W + col (row-major, matches LairIndexCsr)
 //   1 unit_meta     array<u32>  bit0=fortified, bits1..7=player_idx
 //   2 player_rng    array<u32>  2 u32 per player (lo,hi of SplitMix64 state), read_write
 //   3 csr_offsets   array<u32>  CSR offsets, len = W*H + 1
@@ -25,7 +29,7 @@ struct Uniforms {
     tier_kill_exponent:    f32,
     fortify_divisor:       f32,
     encounter_probability: f32,
-    _pad:                  u32,
+    player_idx:            u32,
 }
 
 @group(0) @binding(0) var<storage, read>       unit_tiles:  array<u32>;
@@ -106,42 +110,45 @@ fn kill_probability(tier: u32, fortified: bool, u: Uniforms) -> f32 {
     return raw;
 }
 
-@compute @workgroup_size(64, 1, 1)
-fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
-    let uid = gid.x;
+// Single-threaded sequential loop over all units for one player.
+// Dispatched as (1, 1, 1) — no intra-workgroup parallelism on RNG state.
+@compute @workgroup_size(1, 1, 1)
+fn main() {
     let uni = uniforms[0];
-    if uid >= uni.n_units { return; }
-
-    let tile = unit_tiles[uid];
-    let um   = unit_meta[uid];
-    let pi   = (um >> 1u) & 0x7Fu;
-    let fort = (um & 1u) != 0u;
+    let pi  = uni.player_idx;
 
     var rlo = player_rng[pi * 2u];
     var rhi = player_rng[pi * 2u + 1u];
 
-    if tile >= uni.grid_width * uni.grid_height {
-        kill_flags[uid] = 0u;
-        return;
-    }
+    for (var uid = 0u; uid < uni.n_units; uid++) {
+        let tile = unit_tiles[uid];
+        let um   = unit_meta[uid];
+        let fort = (um & 1u) != 0u;
 
-    let start = csr_offsets[tile];
-    let end   = csr_offsets[tile + 1u];
-    var killed = false;
+        if tile >= uni.grid_width * uni.grid_height {
+            kill_flags[uid] = 0u;
+            continue;
+        }
 
-    for (var p = start; p < end; p++) {
-        let tier = lair_tiers[csr_lairs[p]];
+        let start = csr_offsets[tile];
+        let end   = csr_offsets[tile + 1u];
+        var killed = false;
 
-        // Encounter gate roll
-        let s1 = smix_step(rlo, rhi); rlo = s1.x; rhi = s1.y;
-        if rand_f32(rhi) > uni.encounter_probability { continue; }
+        for (var p = start; p < end; p++) {
+            let tier = lair_tiers[csr_lairs[p]];
 
-        // Kill roll
-        let s2 = smix_step(rlo, rhi); rlo = s2.x; rhi = s2.y;
-        if rand_f32(rhi) <= kill_probability(tier, fort, uni) { killed = true; }
+            // Encounter gate roll
+            let s1 = smix_step(rlo, rhi); rlo = s1.x; rhi = s1.y;
+            if rand_f32(rhi) > uni.encounter_probability { continue; }
+
+            // Kill roll
+            let s2 = smix_step(rlo, rhi); rlo = s2.x; rhi = s2.y;
+            if rand_f32(rhi) <= kill_probability(tier, fort, uni) { killed = true; }
+        }
+
+        kill_flags[uid] = select(0u, 1u, killed);
     }
 
     player_rng[pi * 2u]      = rlo;
     player_rng[pi * 2u + 1u] = rhi;
-    kill_flags[uid] = select(0u, 1u, killed);
 }
diff --git a/src/simulator/crates/mc-turn/src/gpu/mod.rs b/src/simulator/crates/mc-turn/src/gpu/mod.rs
index 0859efdf..ca2e304e 100644
--- a/src/simulator/crates/mc-turn/src/gpu/mod.rs
+++ b/src/simulator/crates/mc-turn/src/gpu/mod.rs
@@ -26,7 +26,7 @@ mod inner {
     /// Minimal per-unit data needed by the shader.
     #[derive(Clone, Debug)]
     pub struct GpuUnit {
-        /// Flat tile index: col * grid_height + row
+        /// Flat tile index: row * grid_width + col  (row-major, matches LairIndexCsr)
         pub tile_idx: u32,
         /// bit0 = is_fortified, bits 1..7 = player_index (max 127 players)
         pub meta: u32,
@@ -44,7 +44,7 @@ mod inner {
         tier_kill_exponent:    f32,
         fortify_divisor:       f32,
         encounter_probability: f32,
-        _pad0:                 u32,
+        player_idx:            u32,
     }
 
     /// Lazy-initialized wgpu context. Created once, reused across dispatches.
@@ -167,7 +167,7 @@ mod inner {
                 tier_kill_exponent:    cfg.tier_kill_exponent,
                 fortify_divisor:       cfg.fortify_divisor,
                 encounter_probability: cfg.encounter_probability_per_turn,
-                _pad0:                 0,
+                player_idx:            player_idx as u32,
             };
 
             // Upload buffers
@@ -195,7 +195,6 @@ mod inner {
                 ],
             });
 
-            let workgroups = (n + 63) / 64;
             let mut encoder = dev.create_command_encoder(&wgpu::CommandEncoderDescriptor {
                 label: Some("fauna_enc"),
             });
@@ -206,7 +205,7 @@ mod inner {
                 });
                 pass.set_pipeline(&self.pipeline);
                 pass.set_bind_group(0, &bind_group, &[]);
-                pass.dispatch_workgroups(workgroups, 1, 1);
+                pass.dispatch_workgroups(1, 1, 1);
             }
 
             // Readback: copy kill_flags to a MAP_READ staging buffer.
@@ -436,7 +435,7 @@ mod inner {
                     // Sort units by tile_idx ascending (required for RNG determinism)
                     let mut gpu_units: Vec<GpuUnit> = gpu_state.players[pi].units.iter()
                         .map(|u| {
-                            let tile = u.col * H + u.row;
+                            let tile = u.row * W + u.col;
                             let meta = (pi as u32) << 1 | (u.is_fortified as u32);
                             GpuUnit { tile_idx: tile as u32, meta }
                         })
@@ -462,8 +461,12 @@ mod inner {
                 "GPU kill_flags must be byte-identical to CPU across {TURNS} turns with seed={SEED}");
         }
 
-        /// Scalar parity: smix_step(lo, hi) == hash_mix(state, SALT) for N iterations.
-        /// Catches any constant or bit-shift mismatch between CPU and WGSL.
+        /// Scalar parity: one GPU smix_step must equal one CPU hash_mix(state, SALT) call.
+        ///
+        /// Uses encounter_probability=1.0 and base_kill_rate=0.0 so each dispatch
+        /// executes exactly 2 smix_steps (encounter gate + kill roll, kill never fires).
+        /// CPU mirrors the same 2 rand_unit calls.  16 iterations catch any constant
+        /// or bit-shift divergence early.
         #[test]
         fn smix_step_matches_cpu_hash_mix() {
             let ctx = match GpuContext::try_init() {
@@ -474,31 +477,33 @@ mod inner {
                 }
             };
 
-            // Drive 16 single-unit dispatches with no lairs so the only RNG output
-            // is the unchanged rng_state readback.  We verify state evolves identically.
-            let cfg = LairCombatConfig::default();
-            let empty_csr = LairIndexCsr { offsets: vec![0u32; 2], flat_lair_ids: vec![] };
-            let lair_tiers: Vec<u32> = vec![];
+            // encounter_probability=1.0 → gate always passes (2 steps per lair)
+            // base_kill_rate=0.0       → kill never fires, unit survives
+            let cfg = LairCombatConfig {
+                encounter_probability_per_turn: 1.0,
+                base_kill_rate: 0.0,
+                ..LairCombatConfig::default()
+            };
+            // One lair on tile 0 (grid 2×1 so tile 0 exists)
+            let one_lair_csr = LairIndexCsr {
+                offsets: vec![0u32, 1u32, 1u32],
+                flat_lair_ids: vec![0u32],
+            };
+            let one_tier: Vec<u32> = vec![1u32];
+            let unit = GpuUnit { tile_idx: 0, meta: 0 };
 
             let mut cpu_state: u64 = 0xDEAD_C0DE_1234_5678;
             let mut gpu_state: u64 = cpu_state;
 
             for _ in 0..16 {
-                // CPU: one rand_unit call = one hash_mix step
-                let (_, next) = rand_unit(cpu_state);
-                cpu_state = next;
+                // CPU: 2 rand_unit steps (encounter gate + kill roll)
+                let (_, s1) = rand_unit(cpu_state);
+                let (_, s2) = rand_unit(s1);
+                cpu_state = s2;
 
-                // GPU: dispatch a single unit on a tile with no lairs.
-                // The kernel still calls smix_step once for the (skipped) encounter roll.
-                let unit = GpuUnit { tile_idx: 0, meta: 0 };
-                // Patch csr so tile 0 has one lair entry (forcing one encounter roll).
-                let one_lair_csr = LairIndexCsr {
-                    offsets: vec![0u32, 1u32, 1u32],
-                    flat_lair_ids: vec![0u32],
-                };
-                let one_tier: Vec<u32> = vec![1u32];
+                // GPU: dispatch one unit — kernel runs 2 smix_steps for the same lair
                 ctx.dispatch_player_fauna(
-                    &[unit],
+                    &[unit.clone()],
                     0,
                     &mut gpu_state,
                     &one_lair_csr,
@@ -509,9 +514,8 @@ mod inner {
                 );
             }
 
-            // After 16 identical steps the states must agree.
             assert_eq!(cpu_state, gpu_state,
-                "smix_step must produce byte-identical output to CPU hash_mix after 16 steps");
+                "smix_step must be byte-identical to CPU hash_mix after 16×2 steps");
         }
 
         /// Fallback: when no GPU is available, dispatch returns empty without panicking.
diff --git a/src/simulator/crates/mc-turn/src/victory.rs b/src/simulator/crates/mc-turn/src/victory.rs
index 2b83b763..4f05601d 100644
--- a/src/simulator/crates/mc-turn/src/victory.rs
+++ b/src/simulator/crates/mc-turn/src/victory.rs
@@ -39,16 +39,31 @@ impl VictoryType {
 
 // ── Score formula ────────────────────────────────────────────────────────────
 //
-// Weights (tuneable — start conservative to avoid over-rewarding a single axis):
-//   cities     × 50   — settled territory is the backbone of any lead
-//   population × 5    — sustained growth beats quick burst expansion
-//   techs      × 10   — research is a multiplier on every other axis
-//   gold       / 10   — wealth matters but liquid gold decays quickly
-//   units      × 2    — surviving military signals combat power, not kills
-//
-// No wonders_built term: PlayerState carries no wonder count; the GDScript
-// layer can augment the displayed score but the Rust tiebreak runs on the
-// above fields only.
+// Tuneable per-axis weights. Rationale on each constant: scoring rewards
+// durable sources of power (territory, population, research) more than
+// liquid or ephemeral ones (gold, individual units). No wonders_built term
+// because PlayerState carries no wonder count; the GDScript layer can
+// augment the displayed score but the Rust tiebreak runs on these fields
+// only.
+
+/// Points per city owned — settled territory is the backbone of any lead.
+pub const SCORE_WEIGHT_PER_CITY: i64 = 50;
+
+/// Points per population unit — sustained growth beats quick burst expansion.
+pub const SCORE_WEIGHT_PER_POP: i64 = 5;
+
+/// Points per researched tech — research multiplies every other axis.
+pub const SCORE_WEIGHT_PER_TECH: i64 = 10;
+
+/// Divisor on treasury gold — wealth matters but liquid gold decays quickly,
+/// so a flat division keeps a merchant player from running away with score
+/// on a single well-played gold turn.
+pub const SCORE_GOLD_DIVISOR: i64 = 10;
+
+/// Points per surviving military unit — surviving military signals combat
+/// power. Intentionally low vs cities because a large but idle army isn't
+/// a lead on its own.
+pub const SCORE_WEIGHT_PER_UNIT: i64 = 2;
 
 /// Compute a deterministic score for a single player.
 /// Used by `check_score_victory` and exposed for testing.
@@ -63,7 +78,11 @@ pub fn calculate_score(player: &PlayerState) -> i64 {
     let unit_count = player.units.len() as i64;
     let gold = player.gold.max(0) as i64;
 
-    city_count * 50 + pop_total * 5 + tech_count * 10 + gold / 10 + unit_count * 2
+    city_count * SCORE_WEIGHT_PER_CITY
+        + pop_total * SCORE_WEIGHT_PER_POP
+        + tech_count * SCORE_WEIGHT_PER_TECH
+        + gold / SCORE_GOLD_DIVISOR
+        + unit_count * SCORE_WEIGHT_PER_UNIT
 }
 
 /// Tiebreak at max-turns: player with highest score wins.