From a1c72d97e782b756aac7e6906cd0b6452c48de72 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Tue, 31 Mar 2026 22:47:33 -0700 Subject: [PATCH] =?UTF-8?q?deps-upgrade(mc-compute):=20=E2=AC=86=EF=B8=8F?= =?UTF-8?q?=20Pin=20or=20upgrade=20mc-compute=20dependency=20for=20perform?= =?UTF-8?q?ance/stability=20improvements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- src/simulator/crates/mc-compute/Cargo.toml | 22 ++ src/simulator/crates/mc-compute/src/cpu.rs | 129 +++++++ .../crates/mc-compute/src/gpu/buffers.rs | 257 ++++++++++++++ .../crates/mc-compute/src/gpu/mod.rs | 256 ++++++++++++++ .../crates/mc-compute/src/gpu/pipelines.rs | 205 +++++++++++ .../src/gpu/shaders/moisture_wind.wgsl | 73 ++++ .../gpu/shaders/temperature_diffusion.wgsl | 63 ++++ src/simulator/crates/mc-compute/src/lib.rs | 99 ++++++ src/simulator/crates/mc-compute/src/tests.rs | 327 ++++++++++++++++++ 9 files changed, 1431 insertions(+) create mode 100644 src/simulator/crates/mc-compute/Cargo.toml create mode 100644 src/simulator/crates/mc-compute/src/cpu.rs create mode 100644 src/simulator/crates/mc-compute/src/gpu/buffers.rs create mode 100644 src/simulator/crates/mc-compute/src/gpu/mod.rs create mode 100644 src/simulator/crates/mc-compute/src/gpu/pipelines.rs create mode 100644 src/simulator/crates/mc-compute/src/gpu/shaders/moisture_wind.wgsl create mode 100644 src/simulator/crates/mc-compute/src/gpu/shaders/temperature_diffusion.wgsl create mode 100644 src/simulator/crates/mc-compute/src/lib.rs create mode 100644 src/simulator/crates/mc-compute/src/tests.rs diff --git a/src/simulator/crates/mc-compute/Cargo.toml b/src/simulator/crates/mc-compute/Cargo.toml new file mode 100644 index 00000000..f69ebce1 --- /dev/null +++ b/src/simulator/crates/mc-compute/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "mc-compute" +version = "0.1.0" +edition = "2021" + +[features] +default = ["cpu"] +cpu = [] +gpu = ["dep:wgpu", "dep:pollster", "dep:bytemuck"] +parallel = ["dep:rayon"] + +[dependencies] +mc-core = { path = "../mc-core" } +mc-climate = { path = "../mc-climate" } + +wgpu = { version = "24", optional = true } +pollster = { version = "0.4", optional = true } +bytemuck = { version = "1", features = ["derive"], optional = true } +rayon = { version = "1.10", optional = true } + +[dev-dependencies] +serde_json = "1" diff --git a/src/simulator/crates/mc-compute/src/cpu.rs b/src/simulator/crates/mc-compute/src/cpu.rs new file mode 100644 index 00000000..ada9b43d --- /dev/null +++ b/src/simulator/crates/mc-compute/src/cpu.rs @@ -0,0 +1,129 @@ +//! CPU-parallel climate processing via rayon. +//! +//! The two hot-path diffusion loops (temperature, moisture) are extracted as +//! focused functions using `par_iter` from rayon. All other steps delegate to +//! the existing single-threaded `ClimatePhysics` methods. + +#[cfg(feature = "parallel")] +use mc_climate::{ClimatePhysics, FLAG_IS_ELEVATED}; +#[cfg(feature = "parallel")] +use mc_core::grid::GridState; + +/// Run a full climate step with rayon parallelism on the diffusion passes. +/// +/// Steps that are inherently sequential (river transport, lake chains, terrain +/// evolution) stay single-threaded. Only the two O(n) stencil passes are parallelised. +#[cfg(feature = "parallel")] +pub fn process_step_parallel(physics: &mut ClimatePhysics, grid: &mut GridState, turn: u32, dt: f32) { + physics.rebuild_tile_cache(grid); + physics.apply_orbital_forcing(grid, turn); + physics.apply_aerosol_forcing(grid); + temperature_diffusion_parallel(physics, grid, dt); + physics.update_lake_thermal_effects(grid, dt); + moisture_diffusion_parallel(physics, grid, dt); + physics.step_remaining(grid, dt); +} + +/// Parallel temperature diffusion — ports `ClimatePhysics::update_temperatures`. +/// +/// Reads from a snapshot of `buf_a` (old temperatures), writes results back +/// to `grid.tiles[i].temperature`. The double-buffer pattern means every tile +/// reads old state, so all invocations are independent. +#[cfg(feature = "parallel")] +fn temperature_diffusion_parallel(physics: &ClimatePhysics, grid: &mut GridState, dt: f32) { + use mc_core::algorithms::hex; + use rayon::prelude::*; + + let conductivity = (physics.get_param("wind_conductivity", 0.1) as f32 * dt).min(0.9); + let energy_scale = physics.get_param("energy_scale", 0.005) as f32 * dt; + let relaxation = (physics.get_param("equilibrium_relaxation", 0.08) as f32 * dt).min(0.9); + let solar_min = physics.get_param("solar_min", 0.05) as f32; + let solar_max = physics.get_param("solar_max", 0.70) as f32; + + let w = grid.width; + let h = grid.height; + let solar_by_row: Vec = (0..h) + .map(|row| hex::solar_by_row(row, h, solar_min, solar_max)) + .collect(); + + // Snapshot old temperatures — independent reads in parallel body + let old_temps: Vec = grid.tiles.iter().map(|t| t.temperature).collect(); + let tile_albedo = physics.tile_albedo_ref(); + + let new_temps: Vec = (0..grid.tiles.len()) + .into_par_iter() + .map(|i| { + let tile = &grid.tiles[i]; + let solar = solar_by_row[tile.row.clamp(0, h - 1) as usize]; + let current = old_temps[i]; + let net_solar = solar * (1.0 - tile_albedo[i]) * energy_scale; + + let mut wind_transport = 0.0f32; + if let Some((uc, ur)) = + hex::upwind_offset(tile.col, tile.row, tile.wind_direction, w, h) + { + let upwind_idx = (ur * w + uc) as usize; + wind_transport = (old_temps[upwind_idx] - current) * tile.wind_speed * conductivity; + } + + let relax = (solar - current) * relaxation; + (current + net_solar + wind_transport + relax + tile.magic_heat_delta).clamp(0.0, 1.0) + }) + .collect(); + + for (i, temp) in new_temps.into_iter().enumerate() { + grid.tiles[i].temperature = temp; + } +} + +/// Parallel moisture wind transport — ports `ClimatePhysics::update_moisture_wind`. +/// +/// Reads from a snapshot of decayed moisture; writes results back to +/// `grid.tiles[i].moisture`. All invocations are independent. +#[cfg(feature = "parallel")] +fn moisture_diffusion_parallel(physics: &ClimatePhysics, grid: &mut GridState, dt: f32) { + use mc_core::algorithms::hex; + use rayon::prelude::*; + + let transport_rate = (physics.get_param("moisture_transport", 0.15) as f32 * dt).min(0.9); + let decay = (physics.get_param("moisture_decay", 0.995) as f32).powf(dt); + let rain_shadow_block = physics.get_param("mountain_rain_shadow_block", 0.9) as f32; + let atmo_loss = (physics.get_param("atmospheric_loss_rate", 0.0003) as f32 * dt).min(0.9); + + let w = grid.width; + let h = grid.height; + + // Snapshot decayed moisture — independent reads in parallel body + let old_moisture: Vec = grid.tiles.iter().map(|t| t.moisture * decay).collect(); + let tile_flags = physics.tile_flags_ref(); + let tile_evapotrans = physics.tile_evapotrans_ref(); + + let new_moisture: Vec = (0..grid.tiles.len()) + .into_par_iter() + .map(|i| { + let tile = &grid.tiles[i]; + let current = old_moisture[i]; + + let mut transported = 0.0f32; + if let Some((uc, ur)) = + hex::upwind_offset(tile.col, tile.row, tile.wind_direction, w, h) + { + let upwind_idx = (ur * w + uc) as usize; + let block = if tile_flags[upwind_idx] & FLAG_IS_ELEVATED != 0 { + rain_shadow_block + } else { + 0.0 + }; + transported = old_moisture[upwind_idx] * tile.wind_speed * transport_rate * (1.0 - block); + } + + let space_loss = current * atmo_loss * tile.temperature; + (current + transported + tile_evapotrans[i] - space_loss + tile.magic_moisture_delta) + .clamp(0.0, 1.0) + }) + .collect(); + + for (i, moist) in new_moisture.into_iter().enumerate() { + grid.tiles[i].moisture = moist; + } +} diff --git a/src/simulator/crates/mc-compute/src/gpu/buffers.rs b/src/simulator/crates/mc-compute/src/gpu/buffers.rs new file mode 100644 index 00000000..a1a2f320 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/gpu/buffers.rs @@ -0,0 +1,257 @@ +//! GPU buffer layout and data transfer for climate compute shaders. +//! +//! Uses Structure-of-Arrays (SoA) layout — only the ~15 fields needed by the +//! hot-path shaders are uploaded, not all 134 TileState fields. + +use mc_core::algorithms::hex; +use mc_core::grid::GridState; +use wgpu::util::DeviceExt; + +/// Precomputed neighbor lookup table: `n * 6` entries where entry `[i * 6 + dir]` +/// is the flat index of tile `i`'s neighbor in direction `dir`, or `0xFFFFFFFF` +/// if that neighbor is out of bounds. +/// +/// Built once at map creation. Eliminates all hex coordinate math in shaders. +pub fn build_neighbor_lut(width: i32, height: i32) -> Vec { + let n = (width * height) as usize; + let mut lut = vec![0xFFFF_FFFFu32; n * 6]; + for row in 0..height { + for col in 0..width { + let idx = (row * width + col) as usize; + let parity = (col & 1) as usize; + for dir in 0..6 { + let (dc, dr) = hex::ODD_Q_NEIGHBORS[parity][dir]; + let nc = col + dc; + let nr = row + dr; + if nc >= 0 && nc < width && nr >= 0 && nr < height { + lut[idx * 6 + dir] = (nr * width + nc) as u32; + } + } + } + } + lut +} + +/// GPU buffers for climate simulation. +pub struct ClimateBuffers { + pub(crate) climate_core: wgpu::Buffer, + pub(crate) climate_aux: wgpu::Buffer, + pub(crate) climate_int: wgpu::Buffer, + pub(crate) neighbor_lut: wgpu::Buffer, + pub(crate) solar_row: wgpu::Buffer, + pub(crate) out_temp: wgpu::Buffer, + pub(crate) out_moisture: wgpu::Buffer, + /// Staging buffer for reading results back to CPU. + pub(crate) readback_staging: wgpu::Buffer, + pub(crate) tile_count: u32, +} + +impl ClimateBuffers { + /// Allocate GPU buffers sized for the given grid. Neighbor LUT is uploaded + /// once and never changes. + pub fn new(device: &wgpu::Device, grid: &GridState) -> Self { + let n = grid.tiles.len(); + let tile_count = n as u32; + let height = grid.height as u32; + + // Neighbor LUT — static, uploaded once + let lut_data = build_neighbor_lut(grid.width, grid.height); + let neighbor_lut = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("neighbor_lut"), + contents: bytemuck::cast_slice(&lut_data), + usage: wgpu::BufferUsages::STORAGE, + }); + + let vec4f_size = (n * 16) as u64; // vec4 = 16 bytes + let vec4u_size = (n * 16) as u64; // vec4 = 16 bytes + let f32_size = (n * 4) as u64; + + let climate_core = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("climate_core"), + size: vec4f_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let climate_aux = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("climate_aux"), + size: vec4f_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let climate_int = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("climate_int"), + size: vec4u_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let solar_row = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("solar_row"), + size: (height as u64) * 4, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let out_temp = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("out_temp"), + size: f32_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let out_moisture = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("out_moisture"), + size: f32_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let readback_staging = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("readback_staging"), + size: f32_size, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + Self { + climate_core, + climate_aux, + climate_int, + neighbor_lut, + solar_row, + out_temp, + out_moisture, + readback_staging, + tile_count, + } + } + + /// Upload all three per-tile buffers (core, aux, int) from the current grid state. + /// Use before the temperature shader where no prior partial upload exists. + pub fn upload_core_aux_int( + &self, + queue: &wgpu::Queue, + grid: &GridState, + tile_albedo: &[f32], + tile_evapotrans: &[f32], + tile_flags: &[u8], + ) { + queue.write_buffer(&self.climate_core, 0, bytemuck::cast_slice(&pack_core(grid, 1.0))); + queue.write_buffer( + &self.climate_aux, + 0, + bytemuck::cast_slice(&pack_aux(grid, tile_albedo, tile_evapotrans)), + ); + queue.write_buffer( + &self.climate_int, + 0, + bytemuck::cast_slice(&pack_int(grid, tile_flags)), + ); + } + + /// Upload only `climate_core`: vec4(temperature, moisture, wind_speed, magic_heat_delta). + /// Use when moisture has been pre-decayed and you don't want to overwrite it. + pub fn upload_core(&self, queue: &wgpu::Queue, grid: &GridState) { + queue.write_buffer(&self.climate_core, 0, bytemuck::cast_slice(&pack_core(grid, 1.0))); + } + + /// Upload `climate_core` with moisture pre-decayed by `decay`. + /// Used before the moisture shader to snapshot `moisture * decay`. + pub fn upload_core_decayed_moisture( + &self, + queue: &wgpu::Queue, + grid: &GridState, + decay: f32, + ) { + queue.write_buffer( + &self.climate_core, + 0, + bytemuck::cast_slice(&pack_core(grid, decay)), + ); + } + + /// Upload `climate_aux` and `climate_int` without touching `climate_core`. + /// Use after `upload_core_decayed_moisture` to refresh aux/int data (e.g. after + /// lake thermal step changes temperatures) while keeping the decayed moisture. + pub fn upload_aux_int( + &self, + queue: &wgpu::Queue, + grid: &GridState, + tile_albedo: &[f32], + tile_evapotrans: &[f32], + tile_flags: &[u8], + ) { + queue.write_buffer( + &self.climate_aux, + 0, + bytemuck::cast_slice(&pack_aux(grid, tile_albedo, tile_evapotrans)), + ); + queue.write_buffer( + &self.climate_int, + 0, + bytemuck::cast_slice(&pack_int(grid, tile_flags)), + ); + } + + /// Upload solar-by-row data. Called once per step before the temperature shader. + pub fn upload_solar( + &self, + queue: &wgpu::Queue, + height: i32, + solar_min: f32, + solar_max: f32, + ) { + let solar: Vec = (0..height) + .map(|row| hex::solar_by_row(row, height, solar_min, solar_max)) + .collect(); + queue.write_buffer(&self.solar_row, 0, bytemuck::cast_slice(&solar)); + } +} + +// ── Pure packing functions (testable without GPU) ───────────────────────────── +// +// Each returns a flat Vec in the exact field order expected by the WGSL shaders. +// Correctness of these functions is the correctness of the GPU data path. + +/// Pack `climate_core`: n × vec4(temperature, moisture * moisture_decay, wind_speed, magic_heat_delta). +/// Pass `moisture_decay = 1.0` for the temperature shader (no decay needed). +pub(crate) fn pack_core(grid: &GridState, moisture_decay: f32) -> Vec { + let mut data = Vec::with_capacity(grid.tiles.len() * 4); + for tile in &grid.tiles { + data.push(tile.temperature); + data.push(tile.moisture * moisture_decay); + data.push(tile.wind_speed); + data.push(tile.magic_heat_delta); + } + data +} + +/// Pack `climate_aux`: n × vec4(elevation, magic_moisture_delta, albedo, evapotranspiration). +pub(crate) fn pack_aux( + grid: &GridState, + tile_albedo: &[f32], + tile_evapotrans: &[f32], +) -> Vec { + let mut data = Vec::with_capacity(grid.tiles.len() * 4); + for (i, tile) in grid.tiles.iter().enumerate() { + data.push(tile.elevation); + data.push(tile.magic_moisture_delta); + data.push(tile_albedo[i]); + data.push(tile_evapotrans[i]); + } + data +} + +/// Pack `climate_int`: n × vec4(wind_direction, tile_flags, col, row). +pub(crate) fn pack_int(grid: &GridState, tile_flags: &[u8]) -> Vec { + let mut data = Vec::with_capacity(grid.tiles.len() * 4); + for (i, tile) in grid.tiles.iter().enumerate() { + data.push(tile.wind_direction as u32); + data.push(tile_flags[i] as u32); + data.push(tile.col as u32); + data.push(tile.row as u32); + } + data +} diff --git a/src/simulator/crates/mc-compute/src/gpu/mod.rs b/src/simulator/crates/mc-compute/src/gpu/mod.rs new file mode 100644 index 00000000..f3fa8670 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/gpu/mod.rs @@ -0,0 +1,256 @@ +//! GPU compute backend via wgpu. +//! +//! Handles device initialization, buffer management, and shader dispatch for +//! the two hot-path climate diffusion passes (temperature + moisture). + +pub mod buffers; +pub mod pipelines; + +use buffers::ClimateBuffers; +use mc_climate::ClimatePhysics; +use mc_core::grid::GridState; +use pipelines::{ClimatePipelines, MoistureParams, TempParams}; +use wgpu::util::DeviceExt; + +/// GPU context: device, queue, compiled pipelines, and allocated buffers. +pub struct GpuContext { + device: wgpu::Device, + queue: wgpu::Queue, + pipelines: ClimatePipelines, + buffers: ClimateBuffers, +} + +impl GpuContext { + /// Attempt to initialize GPU compute. Returns `None` if no suitable adapter + /// is found (e.g. headless server, unsupported platform). + pub fn init(grid: &GridState) -> Option { + let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor { + backends: wgpu::Backends::VULKAN + | wgpu::Backends::METAL + | wgpu::Backends::DX12 + | wgpu::Backends::BROWSER_WEBGPU, + ..Default::default() + }); + + let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + compatible_surface: None, + force_fallback_adapter: false, + }))?; + + let (device, queue) = pollster::block_on(adapter.request_device( + &wgpu::DeviceDescriptor { + label: Some("mc-compute"), + required_features: wgpu::Features::empty(), + required_limits: wgpu::Limits::default(), + memory_hints: Default::default(), + }, + None, + )) + .ok()?; + + let pipelines = ClimatePipelines::new(&device); + let buffers = ClimateBuffers::new(&device, grid); + + Some(Self { + device, + queue, + pipelines, + buffers, + }) + } + + /// Dispatch the temperature diffusion compute shader and return per-tile results. + /// + /// Pre-conditions: `buffers.upload_core_aux_int` and `buffers.upload_solar` must + /// have been called with current grid state. + fn dispatch_temperature(&mut self, physics: &ClimatePhysics, dt: f32) -> Vec { + let tile_count = self.buffers.tile_count; + let workgroups = (tile_count + 63) / 64; + + let params = TempParams { + conductivity: (physics.get_param("wind_conductivity", 0.1) as f32 * dt).min(0.9), + energy_scale: physics.get_param("energy_scale", 0.005) as f32 * dt, + relaxation: (physics.get_param("equilibrium_relaxation", 0.08) as f32 * dt).min(0.9), + tile_count, + }; + let params_buf = self + .device + .create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("temp_params"), + contents: bytemuck::bytes_of(¶ms), + usage: wgpu::BufferUsages::UNIFORM, + }); + + let bind_group = + self.pipelines + .create_temp_bind_group(&self.device, &self.buffers, ¶ms_buf); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("temp_dispatch"), + }); + { + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("temperature_diffusion"), + timestamp_writes: None, + }); + pass.set_pipeline(&self.pipelines.temperature); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups(workgroups, 1, 1); + } + self.queue.submit(std::iter::once(encoder.finish())); + + readback( + &self.device, + &self.queue, + &self.buffers.out_temp, + &self.buffers.readback_staging, + tile_count, + ) + } + + /// Dispatch the moisture wind compute shader and return per-tile results. + /// + /// Pre-conditions: `buffers.upload_core_decayed_moisture` and + /// `buffers.upload_aux_int` must have been called with current grid state. + fn dispatch_moisture(&mut self, physics: &ClimatePhysics, dt: f32) -> Vec { + let tile_count = self.buffers.tile_count; + let workgroups = (tile_count + 63) / 64; + + let params = MoistureParams { + transport_rate: (physics.get_param("moisture_transport", 0.15) as f32 * dt).min(0.9), + rain_shadow_block: physics.get_param("mountain_rain_shadow_block", 0.9) as f32, + atmo_loss: (physics.get_param("atmospheric_loss_rate", 0.0003) as f32 * dt).min(0.9), + tile_count, + }; + let params_buf = self + .device + .create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("moisture_params"), + contents: bytemuck::bytes_of(¶ms), + usage: wgpu::BufferUsages::UNIFORM, + }); + + let bind_group = + self.pipelines + .create_moisture_bind_group(&self.device, &self.buffers, ¶ms_buf); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("moisture_dispatch"), + }); + { + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("moisture_wind"), + timestamp_writes: None, + }); + pass.set_pipeline(&self.pipelines.moisture); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups(workgroups, 1, 1); + } + self.queue.submit(std::iter::once(encoder.finish())); + + readback( + &self.device, + &self.queue, + &self.buffers.out_moisture, + &self.buffers.readback_staging, + tile_count, + ) + } + + /// Run a full climate step with GPU acceleration for the two diffusion passes. + /// + /// Steps 0–1 (orbital/aerosol forcing) and steps 3, 5–12 run on CPU. + /// Steps 2 (temperature) and 4 (moisture) dispatch to GPU compute shaders. + pub fn process_step( + &mut self, + physics: &mut ClimatePhysics, + grid: &mut GridState, + turn: u32, + dt: f32, + ) { + physics.rebuild_tile_cache(grid); + physics.apply_orbital_forcing(grid, turn); + physics.apply_aerosol_forcing(grid); + + let solar_min = physics.get_param("solar_min", 0.05) as f32; + let solar_max = physics.get_param("solar_max", 0.70) as f32; + self.buffers.upload_core_aux_int( + &self.queue, + grid, + physics.tile_albedo_ref(), + physics.tile_evapotrans_ref(), + physics.tile_flags_ref(), + ); + self.buffers + .upload_solar(&self.queue, grid.height, solar_min, solar_max); + + // GPU: temperature diffusion (dt pre-multiplied into uniforms) + let temps = self.dispatch_temperature(physics, dt); + for (i, temp) in temps.into_iter().enumerate() { + grid.tiles[i].temperature = temp; + } + + // CPU: lake thermal effects (sparse, only lake tiles) + physics.update_lake_thermal_effects(grid, dt); + + // GPU: moisture wind transport. + // upload_core_decayed_moisture sets climate_core.y = moisture * decay. + // upload_aux_int refreshes aux/int (which may reference updated temperatures) + // WITHOUT touching climate_core — so the decayed moisture is preserved. + let decay = physics.get_param("moisture_decay", 0.995) as f32; + self.buffers + .upload_core_decayed_moisture(&self.queue, grid, decay); + self.buffers.upload_aux_int( + &self.queue, + grid, + physics.tile_albedo_ref(), + physics.tile_evapotrans_ref(), + physics.tile_flags_ref(), + ); + let moisture = self.dispatch_moisture(physics, dt); + for (i, moist) in moisture.into_iter().enumerate() { + grid.tiles[i].moisture = moist; + } + + physics.step_remaining(grid, dt); + } +} + +/// Read `tile_count` f32 values from `source` GPU buffer back to CPU via `staging`. +/// Free function to avoid self-borrow conflicts when `source` and `staging` are fields +/// of the same struct as the `device`/`queue`. +fn readback( + device: &wgpu::Device, + queue: &wgpu::Queue, + source: &wgpu::Buffer, + staging: &wgpu::Buffer, + tile_count: u32, +) -> Vec { + let size = (tile_count as u64) * 4; + + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("readback_encoder"), + }); + encoder.copy_buffer_to_buffer(source, 0, staging, 0, size); + queue.submit(std::iter::once(encoder.finish())); + + let buffer_slice = staging.slice(..); + let (sender, receiver) = std::sync::mpsc::channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |result| { + sender.send(result).unwrap(); + }); + device.poll(wgpu::Maintain::Wait); + receiver.recv().unwrap().unwrap(); + + let data = buffer_slice.get_mapped_range(); + let result: Vec = bytemuck::cast_slice(&data).to_vec(); + drop(data); + staging.unmap(); + result +} diff --git a/src/simulator/crates/mc-compute/src/gpu/pipelines.rs b/src/simulator/crates/mc-compute/src/gpu/pipelines.rs new file mode 100644 index 00000000..e7013fa1 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/gpu/pipelines.rs @@ -0,0 +1,205 @@ +//! Compute pipeline compilation and bind group management. + +use super::buffers::ClimateBuffers; + +/// Uniform parameters for the temperature diffusion shader. +#[repr(C)] +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +pub struct TempParams { + pub conductivity: f32, + pub energy_scale: f32, + pub relaxation: f32, + pub tile_count: u32, +} + +/// Uniform parameters for the moisture wind shader. +#[repr(C)] +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +pub struct MoistureParams { + pub transport_rate: f32, + pub rain_shadow_block: f32, + pub atmo_loss: f32, + pub tile_count: u32, +} + +/// Compiled compute pipelines for climate simulation. +pub struct ClimatePipelines { + pub temperature: wgpu::ComputePipeline, + pub moisture: wgpu::ComputePipeline, + pub temp_bind_group_layout: wgpu::BindGroupLayout, + pub moisture_bind_group_layout: wgpu::BindGroupLayout, +} + +impl ClimatePipelines { + pub fn new(device: &wgpu::Device) -> Self { + // Temperature diffusion shader + let temp_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("temperature_diffusion"), + source: wgpu::ShaderSource::Wgsl( + include_str!("shaders/temperature_diffusion.wgsl").into(), + ), + }); + + // Moisture wind shader + let moisture_shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("moisture_wind"), + source: wgpu::ShaderSource::Wgsl(include_str!("shaders/moisture_wind.wgsl").into()), + }); + + // Temperature bind group layout: 7 bindings + let temp_bind_group_layout = + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("temp_bind_group_layout"), + entries: &[ + storage_ro_entry(0), // climate_core + storage_ro_entry(1), // climate_aux + storage_ro_entry(2), // climate_int + storage_ro_entry(3), // neighbor_lut + storage_ro_entry(4), // solar_row + uniform_entry(5), // params + storage_rw_entry(6), // out_temp + ], + }); + + // Moisture bind group layout: 6 bindings + let moisture_bind_group_layout = + device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("moisture_bind_group_layout"), + entries: &[ + storage_ro_entry(0), // climate_core + storage_ro_entry(1), // climate_aux + storage_ro_entry(2), // climate_int + storage_ro_entry(3), // neighbor_lut + uniform_entry(4), // params + storage_rw_entry(5), // out_moisture + ], + }); + + let temp_pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("temp_pipeline_layout"), + bind_group_layouts: &[&temp_bind_group_layout], + push_constant_ranges: &[], + }); + + let moisture_pipeline_layout = + device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("moisture_pipeline_layout"), + bind_group_layouts: &[&moisture_bind_group_layout], + push_constant_ranges: &[], + }); + + let temperature = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("temperature_diffusion_pipeline"), + layout: Some(&temp_pipeline_layout), + module: &temp_shader, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let moisture = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("moisture_wind_pipeline"), + layout: Some(&moisture_pipeline_layout), + module: &moisture_shader, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + Self { + temperature, + moisture, + temp_bind_group_layout, + moisture_bind_group_layout, + } + } + + /// Create a bind group for the temperature shader dispatch. + pub fn create_temp_bind_group( + &self, + device: &wgpu::Device, + buffers: &ClimateBuffers, + params_buffer: &wgpu::Buffer, + ) -> wgpu::BindGroup { + device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("temp_bind_group"), + layout: &self.temp_bind_group_layout, + entries: &[ + buf_entry(0, &buffers.climate_core), + buf_entry(1, &buffers.climate_aux), + buf_entry(2, &buffers.climate_int), + buf_entry(3, &buffers.neighbor_lut), + buf_entry(4, &buffers.solar_row), + buf_entry(5, params_buffer), + buf_entry(6, &buffers.out_temp), + ], + }) + } + + /// Create a bind group for the moisture shader dispatch. + pub fn create_moisture_bind_group( + &self, + device: &wgpu::Device, + buffers: &ClimateBuffers, + params_buffer: &wgpu::Buffer, + ) -> wgpu::BindGroup { + device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("moisture_bind_group"), + layout: &self.moisture_bind_group_layout, + entries: &[ + buf_entry(0, &buffers.climate_core), + buf_entry(1, &buffers.climate_aux), + buf_entry(2, &buffers.climate_int), + buf_entry(3, &buffers.neighbor_lut), + buf_entry(4, params_buffer), + buf_entry(5, &buffers.out_moisture), + ], + }) + } +} + +fn storage_ro_entry(binding: u32) -> wgpu::BindGroupLayoutEntry { + wgpu::BindGroupLayoutEntry { + binding, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: true }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + } +} + +fn storage_rw_entry(binding: u32) -> wgpu::BindGroupLayoutEntry { + wgpu::BindGroupLayoutEntry { + binding, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + } +} + +fn uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry { + wgpu::BindGroupLayoutEntry { + binding, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + } +} + +fn buf_entry(binding: u32, buffer: &wgpu::Buffer) -> wgpu::BindGroupEntry<'_> { + wgpu::BindGroupEntry { + binding, + resource: buffer.as_entire_binding(), + } +} diff --git a/src/simulator/crates/mc-compute/src/gpu/shaders/moisture_wind.wgsl b/src/simulator/crates/mc-compute/src/gpu/shaders/moisture_wind.wgsl new file mode 100644 index 00000000..5580bf36 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/gpu/shaders/moisture_wind.wgsl @@ -0,0 +1,73 @@ +// Moisture wind transport compute shader. +// Ports ClimatePhysics::update_moisture_wind() from physics.rs. +// +// Each invocation processes one tile: reads decayed moisture + upwind neighbor, +// applies wind transport with rain shadow blocking, evapotranspiration, and +// atmospheric loss. + +struct Params { + transport_rate: f32, + rain_shadow_block: f32, + atmo_loss: f32, + tile_count: u32, +}; + +// Per-tile climate data (SoA layout) — same buffers as temperature shader +// climate_core: vec4(temperature, moisture, wind_speed, magic_heat_delta) +// NOTE: moisture in .y has been pre-decayed on CPU before upload +// climate_aux: vec4(elevation, magic_moisture_delta, albedo, evapotranspiration) +// climate_int: vec4(wind_direction, tile_flags, col, row) + +const FLAG_IS_ELEVATED: u32 = 2u; // bit 1 + +@group(0) @binding(0) var climate_core: array>; +@group(0) @binding(1) var climate_aux: array>; +@group(0) @binding(2) var climate_int: array>; +@group(0) @binding(3) var neighbor_lut: array; +@group(0) @binding(4) var params: Params; +@group(0) @binding(5) var out_moisture: array; + +const INVALID_NEIGHBOR: u32 = 0xFFFFFFFFu; + +@compute @workgroup_size(64) +fn main(@builtin(global_invocation_id) gid: vec3) { + let i = gid.x; + if (i >= params.tile_count) { + return; + } + + let core = climate_core[i]; + let aux = climate_aux[i]; + let ints = climate_int[i]; + + let current_moisture = core.y; // pre-decayed + let temperature = core.x; + let wind_speed = core.z; + let moisture_delta = aux.y; + let evapotrans = aux.w; + let wind_dir = ints.x; + let flags = ints.y; + + // Upwind neighbor moisture transport + let upwind_dir = (wind_dir + 3u) % 6u; + let upwind_idx = neighbor_lut[i * 6u + upwind_dir]; + + var transported: f32 = 0.0; + if (upwind_idx != INVALID_NEIGHBOR) { + let upwind_flags = climate_int[upwind_idx].y; + var block: f32 = 0.0; + if ((upwind_flags & FLAG_IS_ELEVATED) != 0u) { + block = params.rain_shadow_block; + } + let upwind_moisture = climate_core[upwind_idx].y; + transported = upwind_moisture * wind_speed * params.transport_rate * (1.0 - block); + } + + let space_loss = current_moisture * params.atmo_loss * temperature; + + out_moisture[i] = clamp( + current_moisture + transported + evapotrans - space_loss + moisture_delta, + 0.0, + 1.0 + ); +} diff --git a/src/simulator/crates/mc-compute/src/gpu/shaders/temperature_diffusion.wgsl b/src/simulator/crates/mc-compute/src/gpu/shaders/temperature_diffusion.wgsl new file mode 100644 index 00000000..8f08f628 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/gpu/shaders/temperature_diffusion.wgsl @@ -0,0 +1,63 @@ +// Temperature diffusion compute shader. +// Ports ClimatePhysics::update_temperatures() from physics.rs. +// +// Each invocation processes one tile: reads current temperature + upwind neighbor, +// applies solar forcing, wind transport, equilibrium relaxation, and magic delta. + +struct Params { + conductivity: f32, + energy_scale: f32, + relaxation: f32, + tile_count: u32, +}; + +// Per-tile climate data (SoA layout) +// climate_core: vec4(temperature, moisture, wind_speed, magic_heat_delta) +// climate_aux: vec4(elevation, magic_moisture_delta, albedo, evapotranspiration) +// climate_int: vec4(wind_direction, tile_flags, col, row) + +@group(0) @binding(0) var climate_core: array>; +@group(0) @binding(1) var climate_aux: array>; +@group(0) @binding(2) var climate_int: array>; +@group(0) @binding(3) var neighbor_lut: array; +@group(0) @binding(4) var solar_row: array; +@group(0) @binding(5) var params: Params; +@group(0) @binding(6) var out_temp: array; + +const INVALID_NEIGHBOR: u32 = 0xFFFFFFFFu; + +@compute @workgroup_size(64) +fn main(@builtin(global_invocation_id) gid: vec3) { + let i = gid.x; + if (i >= params.tile_count) { + return; + } + + let core = climate_core[i]; + let aux = climate_aux[i]; + let ints = climate_int[i]; + + let current_temp = core.x; + let wind_speed = core.z; + let heat_delta = core.w; + let albedo = aux.z; + let wind_dir = ints.x; + let row = ints.w; + + // Solar insolation for this row + let solar = solar_row[row]; + let net_solar = solar * (1.0 - albedo) * params.energy_scale; + + // Upwind neighbor: direction + 3 mod 6 + let upwind_dir = (wind_dir + 3u) % 6u; + let upwind_idx = neighbor_lut[i * 6u + upwind_dir]; + + var wind_transport: f32 = 0.0; + if (upwind_idx != INVALID_NEIGHBOR) { + let upwind_temp = climate_core[upwind_idx].x; + wind_transport = (upwind_temp - current_temp) * wind_speed * params.conductivity; + } + + let relax = (solar - current_temp) * params.relaxation; + out_temp[i] = clamp(current_temp + net_solar + wind_transport + relax + heat_delta, 0.0, 1.0); +} diff --git a/src/simulator/crates/mc-compute/src/lib.rs b/src/simulator/crates/mc-compute/src/lib.rs new file mode 100644 index 00000000..c817f492 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/lib.rs @@ -0,0 +1,99 @@ +//! mc-compute — GPU and parallel CPU acceleration for climate simulation. +//! +//! Wraps `mc-climate::ClimatePhysics` with optional GPU compute shaders (wgpu) +//! and CPU parallelism (rayon). The CPU path is always available as a fallback. + +pub mod cpu; +#[cfg(feature = "gpu")] +pub mod gpu; +#[cfg(test)] +mod tests; + +use mc_climate::ClimatePhysics; +use mc_core::grid::GridState; + +/// Minimum tile count before GPU dispatch is worthwhile. +/// Below this, upload/download overhead exceeds compute savings. +const GPU_TILE_THRESHOLD: usize = 8_000; + +enum Backend { + Cpu, + #[cfg(feature = "parallel")] + CpuParallel, + #[cfg(feature = "gpu")] + Gpu(gpu::GpuContext), +} + +/// Accelerated climate processor. Owns a `ClimatePhysics` instance and dispatches +/// the hot-path diffusion passes to the selected compute backend. +pub struct AcceleratedClimate { + physics: ClimatePhysics, + backend: Backend, +} + +impl AcceleratedClimate { + /// Create with single-threaded CPU backend (always available, zero overhead). + pub fn new_cpu(physics: ClimatePhysics) -> Self { + Self { physics, backend: Backend::Cpu } + } + + /// Create with rayon parallel backend. + #[cfg(feature = "parallel")] + pub fn new_parallel(physics: ClimatePhysics) -> Self { + Self { physics, backend: Backend::CpuParallel } + } + + /// Create with GPU backend. Falls back to CPU if no suitable device is found. + #[cfg(feature = "gpu")] + pub fn new_gpu(physics: ClimatePhysics, grid: &GridState) -> Self { + match gpu::GpuContext::init(grid) { + Some(ctx) => Self { physics, backend: Backend::Gpu(ctx) }, + None => { + eprintln!("[mc-compute] GPU unavailable, falling back to CPU"); + Self::new_cpu(physics) + } + } + } + + /// Run the full climate step, dispatching the hot-path diffusion passes to + /// the selected backend. `seed` is forwarded to the CPU path for determinism; + /// the GPU and parallel paths will use it when seeded operations are added. + pub fn process_step(&mut self, grid: &mut GridState, turn: u32, seed: u64, dt: f32) { + match &mut self.backend { + Backend::Cpu => { + self.physics.process_step(grid, turn, seed, dt); + } + + #[cfg(feature = "parallel")] + Backend::CpuParallel => { + cpu::process_step_parallel(&mut self.physics, grid, turn, dt); + } + + #[cfg(feature = "gpu")] + Backend::Gpu(ctx) => { + if grid.tiles.len() >= GPU_TILE_THRESHOLD { + ctx.process_step(&mut self.physics, grid, turn, dt); + } else { + self.physics.process_step(grid, turn, seed, dt); + } + } + } + } + + /// Pack per-tile data into the three Float32Array frame buffers used by the + /// web guide renderer. Delegates to `ClimatePhysics::write_frame_buffers`. + pub fn write_frame_buffers( + &self, + grid: &GridState, + tex_a: &mut [f32], + tex_b: &mut [f32], + tex_c: &mut [f32], + ) { + self.physics.write_frame_buffers(grid, tex_a, tex_b, tex_c); + } + + /// Run atmospheric chemistry. Delegates to `ClimatePhysics::step_atmospheric_chemistry`. + pub fn step_atmospheric_chemistry(&self, grid: &mut GridState) { + self.physics.step_atmospheric_chemistry(grid); + } +} diff --git a/src/simulator/crates/mc-compute/src/tests.rs b/src/simulator/crates/mc-compute/src/tests.rs new file mode 100644 index 00000000..4791cf55 --- /dev/null +++ b/src/simulator/crates/mc-compute/src/tests.rs @@ -0,0 +1,327 @@ +//! Unit tests for mc-compute. +//! +//! Covers three areas that are independent of GPU hardware: +//! +//! 1. **Neighbor LUT** — `build_neighbor_lut` correctness (interior, edge, corner, upwind). +//! 2. **Buffer packing** — `pack_core`, `pack_aux`, `pack_int` field ordering matches +//! what the WGSL shaders expect at each binding offset. +//! 3. **Parallel parity** — `process_step_parallel` produces the same per-tile +//! temperature and moisture as the single-threaded `ClimatePhysics::process_step`. + +use mc_core::grid::GridState; + +fn uniform_grid(w: i32, h: i32) -> GridState { + let mut grid = GridState::new(w, h); + for tile in &mut grid.tiles { + tile.temperature = 0.5; + tile.moisture = 0.4; + tile.elevation = 0.3; + tile.biome_id = "grassland".to_string(); + tile.wind_direction = 0; + tile.wind_speed = 0.4; + } + grid +} + +fn varied_grid(w: i32, h: i32) -> GridState { + let mut grid = GridState::new(w, h); + for (i, tile) in grid.tiles.iter_mut().enumerate() { + tile.temperature = (i as f32 * 0.07).fract(); + tile.moisture = (i as f32 * 0.11).fract(); + tile.elevation = (i as f32 * 0.05).fract(); + tile.biome_id = "grassland".to_string(); + tile.wind_direction = (i % 6) as i32; + tile.wind_speed = 0.3 + (i as f32 * 0.03).fract() * 0.4; + tile.magic_heat_delta = 0.0; + tile.magic_moisture_delta = 0.0; + } + grid +} + +fn make_physics() -> mc_climate::ClimatePhysics { + let params = serde_json::json!({ + "wind_conductivity": 0.1, + "energy_scale": 0.005, + "equilibrium_relaxation": 0.08, + "solar_min": 0.05, + "solar_max": 0.70, + "moisture_transport": 0.15, + "moisture_decay": 0.995, + "mountain_rain_shadow_block": 0.9, + "atmospheric_loss_rate": 0.0003, + }); + mc_climate::ClimatePhysics::new(¶ms.to_string(), "[]", "{}") +} + +// ── Neighbor LUT ────────────────────────────────────────────────────────────── + +#[cfg(feature = "gpu")] +mod neighbor_lut { + use crate::gpu::buffers::build_neighbor_lut; + use mc_core::algorithms::hex; + + const OOB: u32 = 0xFFFF_FFFF; + + /// Interior tile of a 5×5 grid should resolve all 6 neighbors. + #[test] + fn interior_tile_has_six_valid_neighbors() { + let (w, h) = (5, 5); + let lut = build_neighbor_lut(w, h); + // Tile (2, 2) is interior — even column + let col = 2i32; + let row = 2i32; + let idx = (row * w + col) as usize; + let neighbors: Vec = (0..6).map(|dir| lut[idx * 6 + dir]).collect(); + assert!( + neighbors.iter().all(|&n| n != OOB), + "interior tile should have all 6 neighbors, got {neighbors:?}", + ); + // Each neighbor index must be within range + let n = (w * h) as u32; + assert!( + neighbors.iter().all(|&n_idx| n_idx < n), + "all neighbor indices must be in-bounds", + ); + } + + /// Corner tile (0,0) of a 5×5 grid has fewer than 6 valid neighbors. + #[test] + fn corner_tile_has_oob_neighbors() { + let (w, h) = (5, 5); + let lut = build_neighbor_lut(w, h); + let idx = 0usize; // tile (col=0, row=0) + let oob_count = (0..6).filter(|&dir| lut[idx * 6 + dir] == OOB).count(); + assert!(oob_count > 0, "corner tile should have at least one OOB neighbor"); + } + + /// LUT neighbor matches `hex::offset_neighbors` for every tile in a 4×4 grid. + #[test] + fn lut_matches_offset_neighbors_exhaustive() { + let (w, h) = (4, 4); + let lut = build_neighbor_lut(w, h); + for row in 0..h { + for col in 0..w { + let idx = (row * w + col) as usize; + let parity = (col & 1) as usize; + for dir in 0..6usize { + let (dc, dr) = hex::ODD_Q_NEIGHBORS[parity][dir]; + let nc = col + dc; + let nr = row + dr; + let expected = if nc >= 0 && nc < w && nr >= 0 && nr < h { + (nr * w + nc) as u32 + } else { + OOB + }; + assert_eq!( + lut[idx * 6 + dir], + expected, + "LUT mismatch at ({col},{row}) dir={dir}", + ); + } + } + } + } + + /// The upwind direction used in shaders — `(wind_dir + 3) % 6` — correctly + /// selects the opposite neighbor via the LUT. + #[test] + fn upwind_direction_is_opposite_of_wind() { + let (w, h) = (5, 5); + let lut = build_neighbor_lut(w, h); + // Interior tile (2, 2), wind blowing East (dir=0) — upwind is West (dir=3) + let col = 2i32; + let row = 2i32; + let wind_dir = 0usize; + let upwind_dir = (wind_dir + 3) % 6; + let idx = (row * w + col) as usize; + let upwind_idx = lut[idx * 6 + upwind_dir]; + assert_ne!(upwind_idx, OOB, "upwind neighbor must be in bounds"); + + // Verify via hex::upwind_offset + let (expected_uc, expected_ur) = + hex::upwind_offset(col, row, wind_dir as i32, w, h).unwrap(); + let expected_idx = (expected_ur * w + expected_uc) as u32; + assert_eq!(upwind_idx, expected_idx, "LUT upwind index must match hex::upwind_offset"); + } +} + +// ── Buffer packing ──────────────────────────────────────────────────────────── + +#[cfg(feature = "gpu")] +mod buffer_packing { + use super::*; + use crate::gpu::buffers::{pack_aux, pack_core, pack_int}; + + /// `pack_core` field order: [temperature, moisture, wind_speed, magic_heat_delta]. + #[test] + fn pack_core_field_order() { + let mut grid = GridState::new(1, 1); + grid.tiles[0].temperature = 0.25; + grid.tiles[0].moisture = 0.50; + grid.tiles[0].wind_speed = 0.75; + grid.tiles[0].magic_heat_delta = 0.10; + + let data = pack_core(&grid, 1.0); + assert_eq!(data.len(), 4); + assert_eq!(data[0], 0.25, "index 0 = temperature"); + assert_eq!(data[1], 0.50, "index 1 = moisture"); + assert_eq!(data[2], 0.75, "index 2 = wind_speed"); + assert_eq!(data[3], 0.10, "index 3 = magic_heat_delta"); + } + + /// `pack_core` with decay applies only to moisture (index 1). + #[test] + fn pack_core_decay_applies_to_moisture_only() { + let mut grid = GridState::new(1, 1); + grid.tiles[0].temperature = 0.6; + grid.tiles[0].moisture = 0.4; + grid.tiles[0].wind_speed = 0.5; + + let decay = 0.995f32; + let data = pack_core(&grid, decay); + assert_eq!(data[0], 0.6, "temperature unchanged"); + assert!((data[1] - 0.4 * decay).abs() < 1e-6, "moisture decayed"); + assert_eq!(data[2], 0.5, "wind_speed unchanged"); + } + + /// `pack_aux` field order: [elevation, magic_moisture_delta, albedo, evapotranspiration]. + #[test] + fn pack_aux_field_order() { + let mut grid = GridState::new(1, 1); + grid.tiles[0].elevation = 0.30; + grid.tiles[0].magic_moisture_delta = 0.05; + + let albedo = [0.40f32]; + let evapotrans = [0.15f32]; + + let data = pack_aux(&grid, &albedo, &evapotrans); + assert_eq!(data.len(), 4); + assert_eq!(data[0], 0.30, "index 0 = elevation"); + assert_eq!(data[1], 0.05, "index 1 = magic_moisture_delta"); + assert_eq!(data[2], 0.40, "index 2 = albedo"); + assert_eq!(data[3], 0.15, "index 3 = evapotranspiration"); + } + + /// `pack_int` field order: [wind_direction, tile_flags, col, row]. + #[test] + fn pack_int_field_order() { + let mut grid = GridState::new(3, 4); + // Pick tile at col=2, row=3 (idx = 3*3+2 = 11) + let tile = &mut grid.tiles[11]; + tile.wind_direction = 4; + tile.col = 2; + tile.row = 3; + + let mut flags = vec![0u8; grid.tiles.len()]; + flags[11] = 0b0000_0011; // FLAG_IS_WATER | FLAG_IS_ELEVATED + + let data = pack_int(&grid, &flags); + let base = 11 * 4; + assert_eq!(data[base], 4, "index 0 = wind_direction"); + assert_eq!(data[base + 1], 0b11, "index 1 = tile_flags"); + assert_eq!(data[base + 2], 2, "index 2 = col"); + assert_eq!(data[base + 3], 3, "index 3 = row"); + } + + /// Multi-tile pack produces n×4 entries with correct stride. + #[test] + fn pack_core_stride() { + let n = 6usize; + let grid = uniform_grid(2, 3); + let data = pack_core(&grid, 1.0); + assert_eq!(data.len(), n * 4); + // Every tile in uniform_grid has temperature=0.5, moisture=0.4 + for i in 0..n { + assert_eq!(data[i * 4], 0.5, "tile {i} temperature"); + assert_eq!(data[i * 4 + 1], 0.4, "tile {i} moisture"); + } + } +} + +// ── Parallel parity ─────────────────────────────────────────────────────────── + +#[cfg(feature = "parallel")] +mod parallel_parity { + use super::*; + use crate::cpu::process_step_parallel; + + const TOLERANCE: f32 = 1e-5; + + fn close(a: f32, b: f32) -> bool { + (a - b).abs() <= TOLERANCE + } + + /// After one step, parallel and single-threaded paths produce the same + /// temperature for every tile on a uniform 8×8 grid. + #[test] + fn temperature_matches_single_threaded() { + let mut grid_st = varied_grid(8, 8); + let mut grid_par = grid_st.clone(); + + let mut physics_st = make_physics(); + let mut physics_par = make_physics(); + + physics_st.process_step(&mut grid_st, 1, 0, 1.0); + process_step_parallel(&mut physics_par, &mut grid_par, 1, 1.0); + + for (i, (st, par)) in grid_st.tiles.iter().zip(grid_par.tiles.iter()).enumerate() { + assert!( + close(st.temperature, par.temperature), + "tile {i}: ST temperature={} PAR temperature={}", + st.temperature, + par.temperature, + ); + } + } + + /// After one step, parallel and single-threaded paths produce the same + /// moisture for every tile. + #[test] + fn moisture_matches_single_threaded() { + let mut grid_st = varied_grid(8, 8); + let mut grid_par = grid_st.clone(); + + let mut physics_st = make_physics(); + let mut physics_par = make_physics(); + + physics_st.process_step(&mut grid_st, 1, 0, 1.0); + process_step_parallel(&mut physics_par, &mut grid_par, 1, 1.0); + + for (i, (st, par)) in grid_st.tiles.iter().zip(grid_par.tiles.iter()).enumerate() { + assert!( + close(st.moisture, par.moisture), + "tile {i}: ST moisture={} PAR moisture={}", + st.moisture, + par.moisture, + ); + } + } + + /// Parallel result is stable across two identical runs (no state leak between calls). + #[test] + fn parallel_is_deterministic() { + let mut grid_a = varied_grid(6, 6); + let mut grid_b = varied_grid(6, 6); + + let mut physics_a = make_physics(); + let mut physics_b = make_physics(); + + process_step_parallel(&mut physics_a, &mut grid_a, 5, 1.0); + process_step_parallel(&mut physics_b, &mut grid_b, 5, 1.0); + + for (i, (a, b)) in grid_a.tiles.iter().zip(grid_b.tiles.iter()).enumerate() { + assert!( + close(a.temperature, b.temperature), + "tile {i}: run A temperature={} run B temperature={}", + a.temperature, + b.temperature, + ); + assert!( + close(a.moisture, b.moisture), + "tile {i}: run A moisture={} run B moisture={}", + a.moisture, + b.moisture, + ); + } + } +}