deps-upgrade(mc-compute): ⬆️ Pin or upgrade mc-compute dependency for performance/stability improvements

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-03-31 22:47:33 -07:00 · 2026-03-31 22:47:33 -07:00 · a1c72d97e7
commit a1c72d97e7
parent e1a5bc319e
9 changed files with 1431 additions and 0 deletions
--- a/src/simulator/crates/mc-compute/Cargo.toml
+++ b/src/simulator/crates/mc-compute/Cargo.toml
@ -0,0 +1,22 @@
+[package]
+name = "mc-compute"
+version = "0.1.0"
+edition = "2021"
+
+[features]
+default = ["cpu"]
+cpu = []
+gpu = ["dep:wgpu", "dep:pollster", "dep:bytemuck"]
+parallel = ["dep:rayon"]
+
+[dependencies]
+mc-core    = { path = "../mc-core" }
+mc-climate = { path = "../mc-climate" }
+
+wgpu     = { version = "24", optional = true }
+pollster = { version = "0.4", optional = true }
+bytemuck = { version = "1", features = ["derive"], optional = true }
+rayon    = { version = "1.10", optional = true }
+
+[dev-dependencies]
+serde_json = "1"
--- a/src/simulator/crates/mc-compute/src/cpu.rs
+++ b/src/simulator/crates/mc-compute/src/cpu.rs
@ -0,0 +1,129 @@
+//! CPU-parallel climate processing via rayon.
+//!
+//! The two hot-path diffusion loops (temperature, moisture) are extracted as
+//! focused functions using `par_iter` from rayon. All other steps delegate to
+//! the existing single-threaded `ClimatePhysics` methods.
+
+#[cfg(feature = "parallel")]
+use mc_climate::{ClimatePhysics, FLAG_IS_ELEVATED};
+#[cfg(feature = "parallel")]
+use mc_core::grid::GridState;
+
+/// Run a full climate step with rayon parallelism on the diffusion passes.
+///
+/// Steps that are inherently sequential (river transport, lake chains, terrain
+/// evolution) stay single-threaded. Only the two O(n) stencil passes are parallelised.
+#[cfg(feature = "parallel")]
+pub fn process_step_parallel(physics: &mut ClimatePhysics, grid: &mut GridState, turn: u32, dt: f32) {
+    physics.rebuild_tile_cache(grid);
+    physics.apply_orbital_forcing(grid, turn);
+    physics.apply_aerosol_forcing(grid);
+    temperature_diffusion_parallel(physics, grid, dt);
+    physics.update_lake_thermal_effects(grid, dt);
+    moisture_diffusion_parallel(physics, grid, dt);
+    physics.step_remaining(grid, dt);
+}
+
+/// Parallel temperature diffusion — ports `ClimatePhysics::update_temperatures`.
+///
+/// Reads from a snapshot of `buf_a` (old temperatures), writes results back
+/// to `grid.tiles[i].temperature`. The double-buffer pattern means every tile
+/// reads old state, so all invocations are independent.
+#[cfg(feature = "parallel")]
+fn temperature_diffusion_parallel(physics: &ClimatePhysics, grid: &mut GridState, dt: f32) {
+    use mc_core::algorithms::hex;
+    use rayon::prelude::*;
+
+    let conductivity = (physics.get_param("wind_conductivity", 0.1) as f32 * dt).min(0.9);
+    let energy_scale = physics.get_param("energy_scale", 0.005) as f32 * dt;
+    let relaxation = (physics.get_param("equilibrium_relaxation", 0.08) as f32 * dt).min(0.9);
+    let solar_min = physics.get_param("solar_min", 0.05) as f32;
+    let solar_max = physics.get_param("solar_max", 0.70) as f32;
+
+    let w = grid.width;
+    let h = grid.height;
+    let solar_by_row: Vec<f32> = (0..h)
+        .map(|row| hex::solar_by_row(row, h, solar_min, solar_max))
+        .collect();
+
+    // Snapshot old temperatures — independent reads in parallel body
+    let old_temps: Vec<f32> = grid.tiles.iter().map(|t| t.temperature).collect();
+    let tile_albedo = physics.tile_albedo_ref();
+
+    let new_temps: Vec<f32> = (0..grid.tiles.len())
+        .into_par_iter()
+        .map(|i| {
+            let tile = &grid.tiles[i];
+            let solar = solar_by_row[tile.row.clamp(0, h - 1) as usize];
+            let current = old_temps[i];
+            let net_solar = solar * (1.0 - tile_albedo[i]) * energy_scale;
+
+            let mut wind_transport = 0.0f32;
+            if let Some((uc, ur)) =
+                hex::upwind_offset(tile.col, tile.row, tile.wind_direction, w, h)
+            {
+                let upwind_idx = (ur * w + uc) as usize;
+                wind_transport = (old_temps[upwind_idx] - current) * tile.wind_speed * conductivity;
+            }
+
+            let relax = (solar - current) * relaxation;
+            (current + net_solar + wind_transport + relax + tile.magic_heat_delta).clamp(0.0, 1.0)
+        })
+        .collect();
+
+    for (i, temp) in new_temps.into_iter().enumerate() {
+        grid.tiles[i].temperature = temp;
+    }
+}
+
+/// Parallel moisture wind transport — ports `ClimatePhysics::update_moisture_wind`.
+///
+/// Reads from a snapshot of decayed moisture; writes results back to
+/// `grid.tiles[i].moisture`. All invocations are independent.
+#[cfg(feature = "parallel")]
+fn moisture_diffusion_parallel(physics: &ClimatePhysics, grid: &mut GridState, dt: f32) {
+    use mc_core::algorithms::hex;
+    use rayon::prelude::*;
+
+    let transport_rate = (physics.get_param("moisture_transport", 0.15) as f32 * dt).min(0.9);
+    let decay = (physics.get_param("moisture_decay", 0.995) as f32).powf(dt);
+    let rain_shadow_block = physics.get_param("mountain_rain_shadow_block", 0.9) as f32;
+    let atmo_loss = (physics.get_param("atmospheric_loss_rate", 0.0003) as f32 * dt).min(0.9);
+
+    let w = grid.width;
+    let h = grid.height;
+
+    // Snapshot decayed moisture — independent reads in parallel body
+    let old_moisture: Vec<f32> = grid.tiles.iter().map(|t| t.moisture * decay).collect();
+    let tile_flags = physics.tile_flags_ref();
+    let tile_evapotrans = physics.tile_evapotrans_ref();
+
+    let new_moisture: Vec<f32> = (0..grid.tiles.len())
+        .into_par_iter()
+        .map(|i| {
+            let tile = &grid.tiles[i];
+            let current = old_moisture[i];
+
+            let mut transported = 0.0f32;
+            if let Some((uc, ur)) =
+                hex::upwind_offset(tile.col, tile.row, tile.wind_direction, w, h)
+            {
+                let upwind_idx = (ur * w + uc) as usize;
+                let block = if tile_flags[upwind_idx] & FLAG_IS_ELEVATED != 0 {
+                    rain_shadow_block
+                } else {
+                    0.0
+                };
+                transported = old_moisture[upwind_idx] * tile.wind_speed * transport_rate * (1.0 - block);
+            }
+
+            let space_loss = current * atmo_loss * tile.temperature;
+            (current + transported + tile_evapotrans[i] - space_loss + tile.magic_moisture_delta)
+                .clamp(0.0, 1.0)
+        })
+        .collect();
+
+    for (i, moist) in new_moisture.into_iter().enumerate() {
+        grid.tiles[i].moisture = moist;
+    }
+}
--- a/src/simulator/crates/mc-compute/src/gpu/buffers.rs
+++ b/src/simulator/crates/mc-compute/src/gpu/buffers.rs
@ -0,0 +1,257 @@
+//! GPU buffer layout and data transfer for climate compute shaders.
+//!
+//! Uses Structure-of-Arrays (SoA) layout — only the ~15 fields needed by the
+//! hot-path shaders are uploaded, not all 134 TileState fields.
+
+use mc_core::algorithms::hex;
+use mc_core::grid::GridState;
+use wgpu::util::DeviceExt;
+
+/// Precomputed neighbor lookup table: `n * 6` entries where entry `[i * 6 + dir]`
+/// is the flat index of tile `i`'s neighbor in direction `dir`, or `0xFFFFFFFF`
+/// if that neighbor is out of bounds.
+///
+/// Built once at map creation. Eliminates all hex coordinate math in shaders.
+pub fn build_neighbor_lut(width: i32, height: i32) -> Vec<u32> {
+    let n = (width * height) as usize;
+    let mut lut = vec![0xFFFF_FFFFu32; n * 6];
+    for row in 0..height {
+        for col in 0..width {
+            let idx = (row * width + col) as usize;
+            let parity = (col & 1) as usize;
+            for dir in 0..6 {
+                let (dc, dr) = hex::ODD_Q_NEIGHBORS[parity][dir];
+                let nc = col + dc;
+                let nr = row + dr;
+                if nc >= 0 && nc < width && nr >= 0 && nr < height {
+                    lut[idx * 6 + dir] = (nr * width + nc) as u32;
+                }
+            }
+        }
+    }
+    lut
+}
+
+/// GPU buffers for climate simulation.
+pub struct ClimateBuffers {
+    pub(crate) climate_core: wgpu::Buffer,
+    pub(crate) climate_aux: wgpu::Buffer,
+    pub(crate) climate_int: wgpu::Buffer,
+    pub(crate) neighbor_lut: wgpu::Buffer,
+    pub(crate) solar_row: wgpu::Buffer,
+    pub(crate) out_temp: wgpu::Buffer,
+    pub(crate) out_moisture: wgpu::Buffer,
+    /// Staging buffer for reading results back to CPU.
+    pub(crate) readback_staging: wgpu::Buffer,
+    pub(crate) tile_count: u32,
+}
+
+impl ClimateBuffers {
+    /// Allocate GPU buffers sized for the given grid. Neighbor LUT is uploaded
+    /// once and never changes.
+    pub fn new(device: &wgpu::Device, grid: &GridState) -> Self {
+        let n = grid.tiles.len();
+        let tile_count = n as u32;
+        let height = grid.height as u32;
+
+        // Neighbor LUT — static, uploaded once
+        let lut_data = build_neighbor_lut(grid.width, grid.height);
+        let neighbor_lut = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: Some("neighbor_lut"),
+            contents: bytemuck::cast_slice(&lut_data),
+            usage: wgpu::BufferUsages::STORAGE,
+        });
+
+        let vec4f_size = (n * 16) as u64; // vec4<f32> = 16 bytes
+        let vec4u_size = (n * 16) as u64; // vec4<u32> = 16 bytes
+        let f32_size = (n * 4) as u64;
+
+        let climate_core = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("climate_core"),
+            size: vec4f_size,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let climate_aux = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("climate_aux"),
+            size: vec4f_size,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let climate_int = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("climate_int"),
+            size: vec4u_size,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let solar_row = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("solar_row"),
+            size: (height as u64) * 4,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        let out_temp = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("out_temp"),
+            size: f32_size,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
+            mapped_at_creation: false,
+        });
+
+        let out_moisture = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("out_moisture"),
+            size: f32_size,
+            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
+            mapped_at_creation: false,
+        });
+
+        let readback_staging = device.create_buffer(&wgpu::BufferDescriptor {
+            label: Some("readback_staging"),
+            size: f32_size,
+            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+            mapped_at_creation: false,
+        });
+
+        Self {
+            climate_core,
+            climate_aux,
+            climate_int,
+            neighbor_lut,
+            solar_row,
+            out_temp,
+            out_moisture,
+            readback_staging,
+            tile_count,
+        }
+    }
+
+    /// Upload all three per-tile buffers (core, aux, int) from the current grid state.
+    /// Use before the temperature shader where no prior partial upload exists.
+    pub fn upload_core_aux_int(
+        &self,
+        queue: &wgpu::Queue,
+        grid: &GridState,
+        tile_albedo: &[f32],
+        tile_evapotrans: &[f32],
+        tile_flags: &[u8],
+    ) {
+        queue.write_buffer(&self.climate_core, 0, bytemuck::cast_slice(&pack_core(grid, 1.0)));
+        queue.write_buffer(
+            &self.climate_aux,
+            0,
+            bytemuck::cast_slice(&pack_aux(grid, tile_albedo, tile_evapotrans)),
+        );
+        queue.write_buffer(
+            &self.climate_int,
+            0,
+            bytemuck::cast_slice(&pack_int(grid, tile_flags)),
+        );
+    }
+
+    /// Upload only `climate_core`: vec4(temperature, moisture, wind_speed, magic_heat_delta).
+    /// Use when moisture has been pre-decayed and you don't want to overwrite it.
+    pub fn upload_core(&self, queue: &wgpu::Queue, grid: &GridState) {
+        queue.write_buffer(&self.climate_core, 0, bytemuck::cast_slice(&pack_core(grid, 1.0)));
+    }
+
+    /// Upload `climate_core` with moisture pre-decayed by `decay`.
+    /// Used before the moisture shader to snapshot `moisture * decay`.
+    pub fn upload_core_decayed_moisture(
+        &self,
+        queue: &wgpu::Queue,
+        grid: &GridState,
+        decay: f32,
+    ) {
+        queue.write_buffer(
+            &self.climate_core,
+            0,
+            bytemuck::cast_slice(&pack_core(grid, decay)),
+        );
+    }
+
+    /// Upload `climate_aux` and `climate_int` without touching `climate_core`.
+    /// Use after `upload_core_decayed_moisture` to refresh aux/int data (e.g. after
+    /// lake thermal step changes temperatures) while keeping the decayed moisture.
+    pub fn upload_aux_int(
+        &self,
+        queue: &wgpu::Queue,
+        grid: &GridState,
+        tile_albedo: &[f32],
+        tile_evapotrans: &[f32],
+        tile_flags: &[u8],
+    ) {
+        queue.write_buffer(
+            &self.climate_aux,
+            0,
+            bytemuck::cast_slice(&pack_aux(grid, tile_albedo, tile_evapotrans)),
+        );
+        queue.write_buffer(
+            &self.climate_int,
+            0,
+            bytemuck::cast_slice(&pack_int(grid, tile_flags)),
+        );
+    }
+
+    /// Upload solar-by-row data. Called once per step before the temperature shader.
+    pub fn upload_solar(
+        &self,
+        queue: &wgpu::Queue,
+        height: i32,
+        solar_min: f32,
+        solar_max: f32,
+    ) {
+        let solar: Vec<f32> = (0..height)
+            .map(|row| hex::solar_by_row(row, height, solar_min, solar_max))
+            .collect();
+        queue.write_buffer(&self.solar_row, 0, bytemuck::cast_slice(&solar));
+    }
+}
+
+// ── Pure packing functions (testable without GPU) ─────────────────────────────
+//
+// Each returns a flat Vec in the exact field order expected by the WGSL shaders.
+// Correctness of these functions is the correctness of the GPU data path.
+
+/// Pack `climate_core`: n × vec4(temperature, moisture * moisture_decay, wind_speed, magic_heat_delta).
+/// Pass `moisture_decay = 1.0` for the temperature shader (no decay needed).
+pub(crate) fn pack_core(grid: &GridState, moisture_decay: f32) -> Vec<f32> {
+    let mut data = Vec::with_capacity(grid.tiles.len() * 4);
+    for tile in &grid.tiles {
+        data.push(tile.temperature);
+        data.push(tile.moisture * moisture_decay);
+        data.push(tile.wind_speed);
+        data.push(tile.magic_heat_delta);
+    }
+    data
+}
+
+/// Pack `climate_aux`: n × vec4(elevation, magic_moisture_delta, albedo, evapotranspiration).
+pub(crate) fn pack_aux(
+    grid: &GridState,
+    tile_albedo: &[f32],
+    tile_evapotrans: &[f32],
+) -> Vec<f32> {
+    let mut data = Vec::with_capacity(grid.tiles.len() * 4);
+    for (i, tile) in grid.tiles.iter().enumerate() {
+        data.push(tile.elevation);
+        data.push(tile.magic_moisture_delta);
+        data.push(tile_albedo[i]);
+        data.push(tile_evapotrans[i]);
+    }
+    data
+}
+
+/// Pack `climate_int`: n × vec4(wind_direction, tile_flags, col, row).
+pub(crate) fn pack_int(grid: &GridState, tile_flags: &[u8]) -> Vec<u32> {
+    let mut data = Vec::with_capacity(grid.tiles.len() * 4);
+    for (i, tile) in grid.tiles.iter().enumerate() {
+        data.push(tile.wind_direction as u32);
+        data.push(tile_flags[i] as u32);
+        data.push(tile.col as u32);
+        data.push(tile.row as u32);
+    }
+    data
+}
--- a/src/simulator/crates/mc-compute/src/gpu/mod.rs
+++ b/src/simulator/crates/mc-compute/src/gpu/mod.rs
@ -0,0 +1,256 @@
+//! GPU compute backend via wgpu.
+//!
+//! Handles device initialization, buffer management, and shader dispatch for
+//! the two hot-path climate diffusion passes (temperature + moisture).
+
+pub mod buffers;
+pub mod pipelines;
+
+use buffers::ClimateBuffers;
+use mc_climate::ClimatePhysics;
+use mc_core::grid::GridState;
+use pipelines::{ClimatePipelines, MoistureParams, TempParams};
+use wgpu::util::DeviceExt;
+
+/// GPU context: device, queue, compiled pipelines, and allocated buffers.
+pub struct GpuContext {
+    device: wgpu::Device,
+    queue: wgpu::Queue,
+    pipelines: ClimatePipelines,
+    buffers: ClimateBuffers,
+}
+
+impl GpuContext {
+    /// Attempt to initialize GPU compute. Returns `None` if no suitable adapter
+    /// is found (e.g. headless server, unsupported platform).
+    pub fn init(grid: &GridState) -> Option<Self> {
+        let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor {
+            backends: wgpu::Backends::VULKAN
+                | wgpu::Backends::METAL
+                | wgpu::Backends::DX12
+                | wgpu::Backends::BROWSER_WEBGPU,
+            ..Default::default()
+        });
+
+        let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
+            power_preference: wgpu::PowerPreference::HighPerformance,
+            compatible_surface: None,
+            force_fallback_adapter: false,
+        }))?;
+
+        let (device, queue) = pollster::block_on(adapter.request_device(
+            &wgpu::DeviceDescriptor {
+                label: Some("mc-compute"),
+                required_features: wgpu::Features::empty(),
+                required_limits: wgpu::Limits::default(),
+                memory_hints: Default::default(),
+            },
+            None,
+        ))
+        .ok()?;
+
+        let pipelines = ClimatePipelines::new(&device);
+        let buffers = ClimateBuffers::new(&device, grid);
+
+        Some(Self {
+            device,
+            queue,
+            pipelines,
+            buffers,
+        })
+    }
+
+    /// Dispatch the temperature diffusion compute shader and return per-tile results.
+    ///
+    /// Pre-conditions: `buffers.upload_core_aux_int` and `buffers.upload_solar` must
+    /// have been called with current grid state.
+    fn dispatch_temperature(&mut self, physics: &ClimatePhysics, dt: f32) -> Vec<f32> {
+        let tile_count = self.buffers.tile_count;
+        let workgroups = (tile_count + 63) / 64;
+
+        let params = TempParams {
+            conductivity: (physics.get_param("wind_conductivity", 0.1) as f32 * dt).min(0.9),
+            energy_scale: physics.get_param("energy_scale", 0.005) as f32 * dt,
+            relaxation: (physics.get_param("equilibrium_relaxation", 0.08) as f32 * dt).min(0.9),
+            tile_count,
+        };
+        let params_buf = self
+            .device
+            .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+                label: Some("temp_params"),
+                contents: bytemuck::bytes_of(&params),
+                usage: wgpu::BufferUsages::UNIFORM,
+            });
+
+        let bind_group =
+            self.pipelines
+                .create_temp_bind_group(&self.device, &self.buffers, &params_buf);
+
+        let mut encoder = self
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+                label: Some("temp_dispatch"),
+            });
+        {
+            let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+                label: Some("temperature_diffusion"),
+                timestamp_writes: None,
+            });
+            pass.set_pipeline(&self.pipelines.temperature);
+            pass.set_bind_group(0, &bind_group, &[]);
+            pass.dispatch_workgroups(workgroups, 1, 1);
+        }
+        self.queue.submit(std::iter::once(encoder.finish()));
+
+        readback(
+            &self.device,
+            &self.queue,
+            &self.buffers.out_temp,
+            &self.buffers.readback_staging,
+            tile_count,
+        )
+    }
+
+    /// Dispatch the moisture wind compute shader and return per-tile results.
+    ///
+    /// Pre-conditions: `buffers.upload_core_decayed_moisture` and
+    /// `buffers.upload_aux_int` must have been called with current grid state.
+    fn dispatch_moisture(&mut self, physics: &ClimatePhysics, dt: f32) -> Vec<f32> {
+        let tile_count = self.buffers.tile_count;
+        let workgroups = (tile_count + 63) / 64;
+
+        let params = MoistureParams {
+            transport_rate: (physics.get_param("moisture_transport", 0.15) as f32 * dt).min(0.9),
+            rain_shadow_block: physics.get_param("mountain_rain_shadow_block", 0.9) as f32,
+            atmo_loss: (physics.get_param("atmospheric_loss_rate", 0.0003) as f32 * dt).min(0.9),
+            tile_count,
+        };
+        let params_buf = self
+            .device
+            .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+                label: Some("moisture_params"),
+                contents: bytemuck::bytes_of(&params),
+                usage: wgpu::BufferUsages::UNIFORM,
+            });
+
+        let bind_group =
+            self.pipelines
+                .create_moisture_bind_group(&self.device, &self.buffers, &params_buf);
+
+        let mut encoder = self
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+                label: Some("moisture_dispatch"),
+            });
+        {
+            let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+                label: Some("moisture_wind"),
+                timestamp_writes: None,
+            });
+            pass.set_pipeline(&self.pipelines.moisture);
+            pass.set_bind_group(0, &bind_group, &[]);
+            pass.dispatch_workgroups(workgroups, 1, 1);
+        }
+        self.queue.submit(std::iter::once(encoder.finish()));
+
+        readback(
+            &self.device,
+            &self.queue,
+            &self.buffers.out_moisture,
+            &self.buffers.readback_staging,
+            tile_count,
+        )
+    }
+
+    /// Run a full climate step with GPU acceleration for the two diffusion passes.
+    ///
+    /// Steps 0–1 (orbital/aerosol forcing) and steps 3, 5–12 run on CPU.
+    /// Steps 2 (temperature) and 4 (moisture) dispatch to GPU compute shaders.
+    pub fn process_step(
+        &mut self,
+        physics: &mut ClimatePhysics,
+        grid: &mut GridState,
+        turn: u32,
+        dt: f32,
+    ) {
+        physics.rebuild_tile_cache(grid);
+        physics.apply_orbital_forcing(grid, turn);
+        physics.apply_aerosol_forcing(grid);
+
+        let solar_min = physics.get_param("solar_min", 0.05) as f32;
+        let solar_max = physics.get_param("solar_max", 0.70) as f32;
+        self.buffers.upload_core_aux_int(
+            &self.queue,
+            grid,
+            physics.tile_albedo_ref(),
+            physics.tile_evapotrans_ref(),
+            physics.tile_flags_ref(),
+        );
+        self.buffers
+            .upload_solar(&self.queue, grid.height, solar_min, solar_max);
+
+        // GPU: temperature diffusion (dt pre-multiplied into uniforms)
+        let temps = self.dispatch_temperature(physics, dt);
+        for (i, temp) in temps.into_iter().enumerate() {
+            grid.tiles[i].temperature = temp;
+        }
+
+        // CPU: lake thermal effects (sparse, only lake tiles)
+        physics.update_lake_thermal_effects(grid, dt);
+
+        // GPU: moisture wind transport.
+        // upload_core_decayed_moisture sets climate_core.y = moisture * decay.
+        // upload_aux_int refreshes aux/int (which may reference updated temperatures)
+        // WITHOUT touching climate_core — so the decayed moisture is preserved.
+        let decay = physics.get_param("moisture_decay", 0.995) as f32;
+        self.buffers
+            .upload_core_decayed_moisture(&self.queue, grid, decay);
+        self.buffers.upload_aux_int(
+            &self.queue,
+            grid,
+            physics.tile_albedo_ref(),
+            physics.tile_evapotrans_ref(),
+            physics.tile_flags_ref(),
+        );
+        let moisture = self.dispatch_moisture(physics, dt);
+        for (i, moist) in moisture.into_iter().enumerate() {
+            grid.tiles[i].moisture = moist;
+        }
+
+        physics.step_remaining(grid, dt);
+    }
+}
+
+/// Read `tile_count` f32 values from `source` GPU buffer back to CPU via `staging`.
+/// Free function to avoid self-borrow conflicts when `source` and `staging` are fields
+/// of the same struct as the `device`/`queue`.
+fn readback(
+    device: &wgpu::Device,
+    queue: &wgpu::Queue,
+    source: &wgpu::Buffer,
+    staging: &wgpu::Buffer,
+    tile_count: u32,
+) -> Vec<f32> {
+    let size = (tile_count as u64) * 4;
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
+            label: Some("readback_encoder"),
+        });
+    encoder.copy_buffer_to_buffer(source, 0, staging, 0, size);
+    queue.submit(std::iter::once(encoder.finish()));
+
+    let buffer_slice = staging.slice(..);
+    let (sender, receiver) = std::sync::mpsc::channel();
+    buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
+        sender.send(result).unwrap();
+    });
+    device.poll(wgpu::Maintain::Wait);
+    receiver.recv().unwrap().unwrap();
+
+    let data = buffer_slice.get_mapped_range();
+    let result: Vec<f32> = bytemuck::cast_slice(&data).to_vec();
+    drop(data);
+    staging.unmap();
+    result
+}
--- a/src/simulator/crates/mc-compute/src/gpu/pipelines.rs
+++ b/src/simulator/crates/mc-compute/src/gpu/pipelines.rs
@ -0,0 +1,205 @@
+//! Compute pipeline compilation and bind group management.
+
+use super::buffers::ClimateBuffers;
+
+/// Uniform parameters for the temperature diffusion shader.
+#[repr(C)]
+#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
+pub struct TempParams {
+    pub conductivity: f32,
+    pub energy_scale: f32,
+    pub relaxation: f32,
+    pub tile_count: u32,
+}
+
+/// Uniform parameters for the moisture wind shader.
+#[repr(C)]
+#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)]
+pub struct MoistureParams {
+    pub transport_rate: f32,
+    pub rain_shadow_block: f32,
+    pub atmo_loss: f32,
+    pub tile_count: u32,
+}
+
+/// Compiled compute pipelines for climate simulation.
+pub struct ClimatePipelines {
+    pub temperature: wgpu::ComputePipeline,
+    pub moisture: wgpu::ComputePipeline,
+    pub temp_bind_group_layout: wgpu::BindGroupLayout,
+    pub moisture_bind_group_layout: wgpu::BindGroupLayout,
+}
+
+impl ClimatePipelines {
+    pub fn new(device: &wgpu::Device) -> Self {
+        // Temperature diffusion shader
+        let temp_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: Some("temperature_diffusion"),
+            source: wgpu::ShaderSource::Wgsl(
+                include_str!("shaders/temperature_diffusion.wgsl").into(),
+            ),
+        });
+
+        // Moisture wind shader
+        let moisture_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: Some("moisture_wind"),
+            source: wgpu::ShaderSource::Wgsl(include_str!("shaders/moisture_wind.wgsl").into()),
+        });
+
+        // Temperature bind group layout: 7 bindings
+        let temp_bind_group_layout =
+            device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: Some("temp_bind_group_layout"),
+                entries: &[
+                    storage_ro_entry(0), // climate_core
+                    storage_ro_entry(1), // climate_aux
+                    storage_ro_entry(2), // climate_int
+                    storage_ro_entry(3), // neighbor_lut
+                    storage_ro_entry(4), // solar_row
+                    uniform_entry(5),    // params
+                    storage_rw_entry(6), // out_temp
+                ],
+            });
+
+        // Moisture bind group layout: 6 bindings
+        let moisture_bind_group_layout =
+            device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+                label: Some("moisture_bind_group_layout"),
+                entries: &[
+                    storage_ro_entry(0), // climate_core
+                    storage_ro_entry(1), // climate_aux
+                    storage_ro_entry(2), // climate_int
+                    storage_ro_entry(3), // neighbor_lut
+                    uniform_entry(4),    // params
+                    storage_rw_entry(5), // out_moisture
+                ],
+            });
+
+        let temp_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("temp_pipeline_layout"),
+            bind_group_layouts: &[&temp_bind_group_layout],
+            push_constant_ranges: &[],
+        });
+
+        let moisture_pipeline_layout =
+            device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+                label: Some("moisture_pipeline_layout"),
+                bind_group_layouts: &[&moisture_bind_group_layout],
+                push_constant_ranges: &[],
+            });
+
+        let temperature = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: Some("temperature_diffusion_pipeline"),
+            layout: Some(&temp_pipeline_layout),
+            module: &temp_shader,
+            entry_point: Some("main"),
+            compilation_options: Default::default(),
+            cache: None,
+        });
+
+        let moisture = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: Some("moisture_wind_pipeline"),
+            layout: Some(&moisture_pipeline_layout),
+            module: &moisture_shader,
+            entry_point: Some("main"),
+            compilation_options: Default::default(),
+            cache: None,
+        });
+
+        Self {
+            temperature,
+            moisture,
+            temp_bind_group_layout,
+            moisture_bind_group_layout,
+        }
+    }
+
+    /// Create a bind group for the temperature shader dispatch.
+    pub fn create_temp_bind_group(
+        &self,
+        device: &wgpu::Device,
+        buffers: &ClimateBuffers,
+        params_buffer: &wgpu::Buffer,
+    ) -> wgpu::BindGroup {
+        device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: Some("temp_bind_group"),
+            layout: &self.temp_bind_group_layout,
+            entries: &[
+                buf_entry(0, &buffers.climate_core),
+                buf_entry(1, &buffers.climate_aux),
+                buf_entry(2, &buffers.climate_int),
+                buf_entry(3, &buffers.neighbor_lut),
+                buf_entry(4, &buffers.solar_row),
+                buf_entry(5, params_buffer),
+                buf_entry(6, &buffers.out_temp),
+            ],
+        })
+    }
+
+    /// Create a bind group for the moisture shader dispatch.
+    pub fn create_moisture_bind_group(
+        &self,
+        device: &wgpu::Device,
+        buffers: &ClimateBuffers,
+        params_buffer: &wgpu::Buffer,
+    ) -> wgpu::BindGroup {
+        device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: Some("moisture_bind_group"),
+            layout: &self.moisture_bind_group_layout,
+            entries: &[
+                buf_entry(0, &buffers.climate_core),
+                buf_entry(1, &buffers.climate_aux),
+                buf_entry(2, &buffers.climate_int),
+                buf_entry(3, &buffers.neighbor_lut),
+                buf_entry(4, params_buffer),
+                buf_entry(5, &buffers.out_moisture),
+            ],
+        })
+    }
+}
+
+fn storage_ro_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
+    wgpu::BindGroupLayoutEntry {
+        binding,
+        visibility: wgpu::ShaderStages::COMPUTE,
+        ty: wgpu::BindingType::Buffer {
+            ty: wgpu::BufferBindingType::Storage { read_only: true },
+            has_dynamic_offset: false,
+            min_binding_size: None,
+        },
+        count: None,
+    }
+}
+
+fn storage_rw_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
+    wgpu::BindGroupLayoutEntry {
+        binding,
+        visibility: wgpu::ShaderStages::COMPUTE,
+        ty: wgpu::BindingType::Buffer {
+            ty: wgpu::BufferBindingType::Storage { read_only: false },
+            has_dynamic_offset: false,
+            min_binding_size: None,
+        },
+        count: None,
+    }
+}
+
+fn uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
+    wgpu::BindGroupLayoutEntry {
+        binding,
+        visibility: wgpu::ShaderStages::COMPUTE,
+        ty: wgpu::BindingType::Buffer {
+            ty: wgpu::BufferBindingType::Uniform,
+            has_dynamic_offset: false,
+            min_binding_size: None,
+        },
+        count: None,
+    }
+}
+
+fn buf_entry(binding: u32, buffer: &wgpu::Buffer) -> wgpu::BindGroupEntry<'_> {
+    wgpu::BindGroupEntry {
+        binding,
+        resource: buffer.as_entire_binding(),
+    }
+}
--- a/src/simulator/crates/mc-compute/src/gpu/shaders/moisture_wind.wgsl
+++ b/src/simulator/crates/mc-compute/src/gpu/shaders/moisture_wind.wgsl
@ -0,0 +1,73 @@
+// Moisture wind transport compute shader.
+// Ports ClimatePhysics::update_moisture_wind() from physics.rs.
+//
+// Each invocation processes one tile: reads decayed moisture + upwind neighbor,
+// applies wind transport with rain shadow blocking, evapotranspiration, and
+// atmospheric loss.
+
+struct Params {
+    transport_rate: f32,
+    rain_shadow_block: f32,
+    atmo_loss: f32,
+    tile_count: u32,
+};
+
+// Per-tile climate data (SoA layout) — same buffers as temperature shader
+// climate_core: vec4(temperature, moisture, wind_speed, magic_heat_delta)
+//   NOTE: moisture in .y has been pre-decayed on CPU before upload
+// climate_aux:  vec4(elevation, magic_moisture_delta, albedo, evapotranspiration)
+// climate_int:  vec4(wind_direction, tile_flags, col, row)
+
+const FLAG_IS_ELEVATED: u32 = 2u;  // bit 1
+
+@group(0) @binding(0) var<storage, read>       climate_core: array<vec4<f32>>;
+@group(0) @binding(1) var<storage, read>       climate_aux:  array<vec4<f32>>;
+@group(0) @binding(2) var<storage, read>       climate_int:  array<vec4<u32>>;
+@group(0) @binding(3) var<storage, read>       neighbor_lut: array<u32>;
+@group(0) @binding(4) var<uniform>             params:       Params;
+@group(0) @binding(5) var<storage, read_write> out_moisture: array<f32>;
+
+const INVALID_NEIGHBOR: u32 = 0xFFFFFFFFu;
+
+@compute @workgroup_size(64)
+fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
+    let i = gid.x;
+    if (i >= params.tile_count) {
+        return;
+    }
+
+    let core = climate_core[i];
+    let aux = climate_aux[i];
+    let ints = climate_int[i];
+
+    let current_moisture = core.y;  // pre-decayed
+    let temperature = core.x;
+    let wind_speed = core.z;
+    let moisture_delta = aux.y;
+    let evapotrans = aux.w;
+    let wind_dir = ints.x;
+    let flags = ints.y;
+
+    // Upwind neighbor moisture transport
+    let upwind_dir = (wind_dir + 3u) % 6u;
+    let upwind_idx = neighbor_lut[i * 6u + upwind_dir];
+
+    var transported: f32 = 0.0;
+    if (upwind_idx != INVALID_NEIGHBOR) {
+        let upwind_flags = climate_int[upwind_idx].y;
+        var block: f32 = 0.0;
+        if ((upwind_flags & FLAG_IS_ELEVATED) != 0u) {
+            block = params.rain_shadow_block;
+        }
+        let upwind_moisture = climate_core[upwind_idx].y;
+        transported = upwind_moisture * wind_speed * params.transport_rate * (1.0 - block);
+    }
+
+    let space_loss = current_moisture * params.atmo_loss * temperature;
+
+    out_moisture[i] = clamp(
+        current_moisture + transported + evapotrans - space_loss + moisture_delta,
+        0.0,
+        1.0
+    );
+}
--- a/src/simulator/crates/mc-compute/src/gpu/shaders/temperature_diffusion.wgsl
+++ b/src/simulator/crates/mc-compute/src/gpu/shaders/temperature_diffusion.wgsl
@ -0,0 +1,63 @@
+// Temperature diffusion compute shader.
+// Ports ClimatePhysics::update_temperatures() from physics.rs.
+//
+// Each invocation processes one tile: reads current temperature + upwind neighbor,
+// applies solar forcing, wind transport, equilibrium relaxation, and magic delta.
+
+struct Params {
+    conductivity: f32,
+    energy_scale: f32,
+    relaxation: f32,
+    tile_count: u32,
+};
+
+// Per-tile climate data (SoA layout)
+// climate_core: vec4(temperature, moisture, wind_speed, magic_heat_delta)
+// climate_aux:  vec4(elevation, magic_moisture_delta, albedo, evapotranspiration)
+// climate_int:  vec4(wind_direction, tile_flags, col, row)
+
+@group(0) @binding(0) var<storage, read>       climate_core: array<vec4<f32>>;
+@group(0) @binding(1) var<storage, read>       climate_aux:  array<vec4<f32>>;
+@group(0) @binding(2) var<storage, read>       climate_int:  array<vec4<u32>>;
+@group(0) @binding(3) var<storage, read>       neighbor_lut: array<u32>;
+@group(0) @binding(4) var<storage, read>       solar_row:    array<f32>;
+@group(0) @binding(5) var<uniform>             params:       Params;
+@group(0) @binding(6) var<storage, read_write> out_temp:     array<f32>;
+
+const INVALID_NEIGHBOR: u32 = 0xFFFFFFFFu;
+
+@compute @workgroup_size(64)
+fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
+    let i = gid.x;
+    if (i >= params.tile_count) {
+        return;
+    }
+
+    let core = climate_core[i];
+    let aux = climate_aux[i];
+    let ints = climate_int[i];
+
+    let current_temp = core.x;
+    let wind_speed = core.z;
+    let heat_delta = core.w;
+    let albedo = aux.z;
+    let wind_dir = ints.x;
+    let row = ints.w;
+
+    // Solar insolation for this row
+    let solar = solar_row[row];
+    let net_solar = solar * (1.0 - albedo) * params.energy_scale;
+
+    // Upwind neighbor: direction + 3 mod 6
+    let upwind_dir = (wind_dir + 3u) % 6u;
+    let upwind_idx = neighbor_lut[i * 6u + upwind_dir];
+
+    var wind_transport: f32 = 0.0;
+    if (upwind_idx != INVALID_NEIGHBOR) {
+        let upwind_temp = climate_core[upwind_idx].x;
+        wind_transport = (upwind_temp - current_temp) * wind_speed * params.conductivity;
+    }
+
+    let relax = (solar - current_temp) * params.relaxation;
+    out_temp[i] = clamp(current_temp + net_solar + wind_transport + relax + heat_delta, 0.0, 1.0);
+}
--- a/src/simulator/crates/mc-compute/src/lib.rs
+++ b/src/simulator/crates/mc-compute/src/lib.rs
@ -0,0 +1,99 @@
+//! mc-compute — GPU and parallel CPU acceleration for climate simulation.
+//!
+//! Wraps `mc-climate::ClimatePhysics` with optional GPU compute shaders (wgpu)
+//! and CPU parallelism (rayon). The CPU path is always available as a fallback.
+
+pub mod cpu;
+#[cfg(feature = "gpu")]
+pub mod gpu;
+#[cfg(test)]
+mod tests;
+
+use mc_climate::ClimatePhysics;
+use mc_core::grid::GridState;
+
+/// Minimum tile count before GPU dispatch is worthwhile.
+/// Below this, upload/download overhead exceeds compute savings.
+const GPU_TILE_THRESHOLD: usize = 8_000;
+
+enum Backend {
+    Cpu,
+    #[cfg(feature = "parallel")]
+    CpuParallel,
+    #[cfg(feature = "gpu")]
+    Gpu(gpu::GpuContext),
+}
+
+/// Accelerated climate processor. Owns a `ClimatePhysics` instance and dispatches
+/// the hot-path diffusion passes to the selected compute backend.
+pub struct AcceleratedClimate {
+    physics: ClimatePhysics,
+    backend: Backend,
+}
+
+impl AcceleratedClimate {
+    /// Create with single-threaded CPU backend (always available, zero overhead).
+    pub fn new_cpu(physics: ClimatePhysics) -> Self {
+        Self { physics, backend: Backend::Cpu }
+    }
+
+    /// Create with rayon parallel backend.
+    #[cfg(feature = "parallel")]
+    pub fn new_parallel(physics: ClimatePhysics) -> Self {
+        Self { physics, backend: Backend::CpuParallel }
+    }
+
+    /// Create with GPU backend. Falls back to CPU if no suitable device is found.
+    #[cfg(feature = "gpu")]
+    pub fn new_gpu(physics: ClimatePhysics, grid: &GridState) -> Self {
+        match gpu::GpuContext::init(grid) {
+            Some(ctx) => Self { physics, backend: Backend::Gpu(ctx) },
+            None => {
+                eprintln!("[mc-compute] GPU unavailable, falling back to CPU");
+                Self::new_cpu(physics)
+            }
+        }
+    }
+
+    /// Run the full climate step, dispatching the hot-path diffusion passes to
+    /// the selected backend. `seed` is forwarded to the CPU path for determinism;
+    /// the GPU and parallel paths will use it when seeded operations are added.
+    pub fn process_step(&mut self, grid: &mut GridState, turn: u32, seed: u64, dt: f32) {
+        match &mut self.backend {
+            Backend::Cpu => {
+                self.physics.process_step(grid, turn, seed, dt);
+            }
+
+            #[cfg(feature = "parallel")]
+            Backend::CpuParallel => {
+                cpu::process_step_parallel(&mut self.physics, grid, turn, dt);
+            }
+
+            #[cfg(feature = "gpu")]
+            Backend::Gpu(ctx) => {
+                if grid.tiles.len() >= GPU_TILE_THRESHOLD {
+                    ctx.process_step(&mut self.physics, grid, turn, dt);
+                } else {
+                    self.physics.process_step(grid, turn, seed, dt);
+                }
+            }
+        }
+    }
+
+    /// Pack per-tile data into the three Float32Array frame buffers used by the
+    /// web guide renderer. Delegates to `ClimatePhysics::write_frame_buffers`.
+    pub fn write_frame_buffers(
+        &self,
+        grid: &GridState,
+        tex_a: &mut [f32],
+        tex_b: &mut [f32],
+        tex_c: &mut [f32],
+    ) {
+        self.physics.write_frame_buffers(grid, tex_a, tex_b, tex_c);
+    }
+
+    /// Run atmospheric chemistry. Delegates to `ClimatePhysics::step_atmospheric_chemistry`.
+    pub fn step_atmospheric_chemistry(&self, grid: &mut GridState) {
+        self.physics.step_atmospheric_chemistry(grid);
+    }
+}
--- a/src/simulator/crates/mc-compute/src/tests.rs
+++ b/src/simulator/crates/mc-compute/src/tests.rs
@ -0,0 +1,327 @@
+//! Unit tests for mc-compute.
+//!
+//! Covers three areas that are independent of GPU hardware:
+//!
+//! 1. **Neighbor LUT** — `build_neighbor_lut` correctness (interior, edge, corner, upwind).
+//! 2. **Buffer packing** — `pack_core`, `pack_aux`, `pack_int` field ordering matches
+//!    what the WGSL shaders expect at each binding offset.
+//! 3. **Parallel parity** — `process_step_parallel` produces the same per-tile
+//!    temperature and moisture as the single-threaded `ClimatePhysics::process_step`.
+
+use mc_core::grid::GridState;
+
+fn uniform_grid(w: i32, h: i32) -> GridState {
+    let mut grid = GridState::new(w, h);
+    for tile in &mut grid.tiles {
+        tile.temperature = 0.5;
+        tile.moisture = 0.4;
+        tile.elevation = 0.3;
+        tile.biome_id = "grassland".to_string();
+        tile.wind_direction = 0;
+        tile.wind_speed = 0.4;
+    }
+    grid
+}
+
+fn varied_grid(w: i32, h: i32) -> GridState {
+    let mut grid = GridState::new(w, h);
+    for (i, tile) in grid.tiles.iter_mut().enumerate() {
+        tile.temperature = (i as f32 * 0.07).fract();
+        tile.moisture = (i as f32 * 0.11).fract();
+        tile.elevation = (i as f32 * 0.05).fract();
+        tile.biome_id = "grassland".to_string();
+        tile.wind_direction = (i % 6) as i32;
+        tile.wind_speed = 0.3 + (i as f32 * 0.03).fract() * 0.4;
+        tile.magic_heat_delta = 0.0;
+        tile.magic_moisture_delta = 0.0;
+    }
+    grid
+}
+
+fn make_physics() -> mc_climate::ClimatePhysics {
+    let params = serde_json::json!({
+        "wind_conductivity": 0.1,
+        "energy_scale": 0.005,
+        "equilibrium_relaxation": 0.08,
+        "solar_min": 0.05,
+        "solar_max": 0.70,
+        "moisture_transport": 0.15,
+        "moisture_decay": 0.995,
+        "mountain_rain_shadow_block": 0.9,
+        "atmospheric_loss_rate": 0.0003,
+    });
+    mc_climate::ClimatePhysics::new(&params.to_string(), "[]", "{}")
+}
+
+// ── Neighbor LUT ──────────────────────────────────────────────────────────────
+
+#[cfg(feature = "gpu")]
+mod neighbor_lut {
+    use crate::gpu::buffers::build_neighbor_lut;
+    use mc_core::algorithms::hex;
+
+    const OOB: u32 = 0xFFFF_FFFF;
+
+    /// Interior tile of a 5×5 grid should resolve all 6 neighbors.
+    #[test]
+    fn interior_tile_has_six_valid_neighbors() {
+        let (w, h) = (5, 5);
+        let lut = build_neighbor_lut(w, h);
+        // Tile (2, 2) is interior — even column
+        let col = 2i32;
+        let row = 2i32;
+        let idx = (row * w + col) as usize;
+        let neighbors: Vec<u32> = (0..6).map(|dir| lut[idx * 6 + dir]).collect();
+        assert!(
+            neighbors.iter().all(|&n| n != OOB),
+            "interior tile should have all 6 neighbors, got {neighbors:?}",
+        );
+        // Each neighbor index must be within range
+        let n = (w * h) as u32;
+        assert!(
+            neighbors.iter().all(|&n_idx| n_idx < n),
+            "all neighbor indices must be in-bounds",
+        );
+    }
+
+    /// Corner tile (0,0) of a 5×5 grid has fewer than 6 valid neighbors.
+    #[test]
+    fn corner_tile_has_oob_neighbors() {
+        let (w, h) = (5, 5);
+        let lut = build_neighbor_lut(w, h);
+        let idx = 0usize; // tile (col=0, row=0)
+        let oob_count = (0..6).filter(|&dir| lut[idx * 6 + dir] == OOB).count();
+        assert!(oob_count > 0, "corner tile should have at least one OOB neighbor");
+    }
+
+    /// LUT neighbor matches `hex::offset_neighbors` for every tile in a 4×4 grid.
+    #[test]
+    fn lut_matches_offset_neighbors_exhaustive() {
+        let (w, h) = (4, 4);
+        let lut = build_neighbor_lut(w, h);
+        for row in 0..h {
+            for col in 0..w {
+                let idx = (row * w + col) as usize;
+                let parity = (col & 1) as usize;
+                for dir in 0..6usize {
+                    let (dc, dr) = hex::ODD_Q_NEIGHBORS[parity][dir];
+                    let nc = col + dc;
+                    let nr = row + dr;
+                    let expected = if nc >= 0 && nc < w && nr >= 0 && nr < h {
+                        (nr * w + nc) as u32
+                    } else {
+                        OOB
+                    };
+                    assert_eq!(
+                        lut[idx * 6 + dir],
+                        expected,
+                        "LUT mismatch at ({col},{row}) dir={dir}",
+                    );
+                }
+            }
+        }
+    }
+
+    /// The upwind direction used in shaders — `(wind_dir + 3) % 6` — correctly
+    /// selects the opposite neighbor via the LUT.
+    #[test]
+    fn upwind_direction_is_opposite_of_wind() {
+        let (w, h) = (5, 5);
+        let lut = build_neighbor_lut(w, h);
+        // Interior tile (2, 2), wind blowing East (dir=0) — upwind is West (dir=3)
+        let col = 2i32;
+        let row = 2i32;
+        let wind_dir = 0usize;
+        let upwind_dir = (wind_dir + 3) % 6;
+        let idx = (row * w + col) as usize;
+        let upwind_idx = lut[idx * 6 + upwind_dir];
+        assert_ne!(upwind_idx, OOB, "upwind neighbor must be in bounds");
+
+        // Verify via hex::upwind_offset
+        let (expected_uc, expected_ur) =
+            hex::upwind_offset(col, row, wind_dir as i32, w, h).unwrap();
+        let expected_idx = (expected_ur * w + expected_uc) as u32;
+        assert_eq!(upwind_idx, expected_idx, "LUT upwind index must match hex::upwind_offset");
+    }
+}
+
+// ── Buffer packing ────────────────────────────────────────────────────────────
+
+#[cfg(feature = "gpu")]
+mod buffer_packing {
+    use super::*;
+    use crate::gpu::buffers::{pack_aux, pack_core, pack_int};
+
+    /// `pack_core` field order: [temperature, moisture, wind_speed, magic_heat_delta].
+    #[test]
+    fn pack_core_field_order() {
+        let mut grid = GridState::new(1, 1);
+        grid.tiles[0].temperature = 0.25;
+        grid.tiles[0].moisture = 0.50;
+        grid.tiles[0].wind_speed = 0.75;
+        grid.tiles[0].magic_heat_delta = 0.10;
+
+        let data = pack_core(&grid, 1.0);
+        assert_eq!(data.len(), 4);
+        assert_eq!(data[0], 0.25, "index 0 = temperature");
+        assert_eq!(data[1], 0.50, "index 1 = moisture");
+        assert_eq!(data[2], 0.75, "index 2 = wind_speed");
+        assert_eq!(data[3], 0.10, "index 3 = magic_heat_delta");
+    }
+
+    /// `pack_core` with decay applies only to moisture (index 1).
+    #[test]
+    fn pack_core_decay_applies_to_moisture_only() {
+        let mut grid = GridState::new(1, 1);
+        grid.tiles[0].temperature = 0.6;
+        grid.tiles[0].moisture = 0.4;
+        grid.tiles[0].wind_speed = 0.5;
+
+        let decay = 0.995f32;
+        let data = pack_core(&grid, decay);
+        assert_eq!(data[0], 0.6, "temperature unchanged");
+        assert!((data[1] - 0.4 * decay).abs() < 1e-6, "moisture decayed");
+        assert_eq!(data[2], 0.5, "wind_speed unchanged");
+    }
+
+    /// `pack_aux` field order: [elevation, magic_moisture_delta, albedo, evapotranspiration].
+    #[test]
+    fn pack_aux_field_order() {
+        let mut grid = GridState::new(1, 1);
+        grid.tiles[0].elevation = 0.30;
+        grid.tiles[0].magic_moisture_delta = 0.05;
+
+        let albedo = [0.40f32];
+        let evapotrans = [0.15f32];
+
+        let data = pack_aux(&grid, &albedo, &evapotrans);
+        assert_eq!(data.len(), 4);
+        assert_eq!(data[0], 0.30, "index 0 = elevation");
+        assert_eq!(data[1], 0.05, "index 1 = magic_moisture_delta");
+        assert_eq!(data[2], 0.40, "index 2 = albedo");
+        assert_eq!(data[3], 0.15, "index 3 = evapotranspiration");
+    }
+
+    /// `pack_int` field order: [wind_direction, tile_flags, col, row].
+    #[test]
+    fn pack_int_field_order() {
+        let mut grid = GridState::new(3, 4);
+        // Pick tile at col=2, row=3 (idx = 3*3+2 = 11)
+        let tile = &mut grid.tiles[11];
+        tile.wind_direction = 4;
+        tile.col = 2;
+        tile.row = 3;
+
+        let mut flags = vec![0u8; grid.tiles.len()];
+        flags[11] = 0b0000_0011; // FLAG_IS_WATER | FLAG_IS_ELEVATED
+
+        let data = pack_int(&grid, &flags);
+        let base = 11 * 4;
+        assert_eq!(data[base],     4,          "index 0 = wind_direction");
+        assert_eq!(data[base + 1], 0b11,       "index 1 = tile_flags");
+        assert_eq!(data[base + 2], 2,          "index 2 = col");
+        assert_eq!(data[base + 3], 3,          "index 3 = row");
+    }
+
+    /// Multi-tile pack produces n×4 entries with correct stride.
+    #[test]
+    fn pack_core_stride() {
+        let n = 6usize;
+        let grid = uniform_grid(2, 3);
+        let data = pack_core(&grid, 1.0);
+        assert_eq!(data.len(), n * 4);
+        // Every tile in uniform_grid has temperature=0.5, moisture=0.4
+        for i in 0..n {
+            assert_eq!(data[i * 4],     0.5, "tile {i} temperature");
+            assert_eq!(data[i * 4 + 1], 0.4, "tile {i} moisture");
+        }
+    }
+}
+
+// ── Parallel parity ───────────────────────────────────────────────────────────
+
+#[cfg(feature = "parallel")]
+mod parallel_parity {
+    use super::*;
+    use crate::cpu::process_step_parallel;
+
+    const TOLERANCE: f32 = 1e-5;
+
+    fn close(a: f32, b: f32) -> bool {
+        (a - b).abs() <= TOLERANCE
+    }
+
+    /// After one step, parallel and single-threaded paths produce the same
+    /// temperature for every tile on a uniform 8×8 grid.
+    #[test]
+    fn temperature_matches_single_threaded() {
+        let mut grid_st = varied_grid(8, 8);
+        let mut grid_par = grid_st.clone();
+
+        let mut physics_st = make_physics();
+        let mut physics_par = make_physics();
+
+        physics_st.process_step(&mut grid_st, 1, 0, 1.0);
+        process_step_parallel(&mut physics_par, &mut grid_par, 1, 1.0);
+
+        for (i, (st, par)) in grid_st.tiles.iter().zip(grid_par.tiles.iter()).enumerate() {
+            assert!(
+                close(st.temperature, par.temperature),
+                "tile {i}: ST temperature={} PAR temperature={}",
+                st.temperature,
+                par.temperature,
+            );
+        }
+    }
+
+    /// After one step, parallel and single-threaded paths produce the same
+    /// moisture for every tile.
+    #[test]
+    fn moisture_matches_single_threaded() {
+        let mut grid_st = varied_grid(8, 8);
+        let mut grid_par = grid_st.clone();
+
+        let mut physics_st = make_physics();
+        let mut physics_par = make_physics();
+
+        physics_st.process_step(&mut grid_st, 1, 0, 1.0);
+        process_step_parallel(&mut physics_par, &mut grid_par, 1, 1.0);
+
+        for (i, (st, par)) in grid_st.tiles.iter().zip(grid_par.tiles.iter()).enumerate() {
+            assert!(
+                close(st.moisture, par.moisture),
+                "tile {i}: ST moisture={} PAR moisture={}",
+                st.moisture,
+                par.moisture,
+            );
+        }
+    }
+
+    /// Parallel result is stable across two identical runs (no state leak between calls).
+    #[test]
+    fn parallel_is_deterministic() {
+        let mut grid_a = varied_grid(6, 6);
+        let mut grid_b = varied_grid(6, 6);
+
+        let mut physics_a = make_physics();
+        let mut physics_b = make_physics();
+
+        process_step_parallel(&mut physics_a, &mut grid_a, 5, 1.0);
+        process_step_parallel(&mut physics_b, &mut grid_b, 5, 1.0);
+
+        for (i, (a, b)) in grid_a.tiles.iter().zip(grid_b.tiles.iter()).enumerate() {
+            assert!(
+                close(a.temperature, b.temperature),
+                "tile {i}: run A temperature={} run B temperature={}",
+                a.temperature,
+                b.temperature,
+            );
+            assert!(
+                close(a.moisture, b.moisture),
+                "tile {i}: run A moisture={} run B moisture={}",
+                a.moisture,
+                b.moisture,
+            );
+        }
+    }
+}