diff --git a/src/simulator/.clippy.toml b/src/simulator/.clippy.toml new file mode 100644 index 00000000..89d58345 --- /dev/null +++ b/src/simulator/.clippy.toml @@ -0,0 +1,6 @@ +disallowed-methods = [ + { path = "rand::thread_rng", reason = "non-deterministic — use mc_mapgen::seed::derive + WorldgenRng" }, + { path = "rand::rngs::StdRng::seed_from_u64", reason = "StdRng is not stable across rand versions" }, + { path = "rand::rngs::SmallRng::seed_from_u64", reason = "SmallRng is not stable across rand versions" }, + { path = "rand::rngs::StdRng::from_entropy", reason = "non-deterministic — use mc_mapgen::seed::derive" }, +] diff --git a/src/simulator/Cargo.lock b/src/simulator/Cargo.lock index cb09e635..3043770d 100644 --- a/src/simulator/Cargo.lock +++ b/src/simulator/Cargo.lock @@ -924,6 +924,7 @@ dependencies = [ "mc-turn", "serde", "serde_json", + "siphasher", ] [[package]] @@ -1504,6 +1505,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.12" diff --git a/src/simulator/Cargo.toml b/src/simulator/Cargo.toml index 942186f6..de396264 100644 --- a/src/simulator/Cargo.toml +++ b/src/simulator/Cargo.toml @@ -36,6 +36,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" getrandom = "0.2" rand = "0.9" +siphasher = "0.3" # pinned — worldgen seed derivation; see mc-mapgen/RNG.md # Workspace-wide lint configuration — Python-styled Rust. # See ~/.claude/instructions/rust-code-standards.md §12 for rationale. diff --git a/src/simulator/crates/mc-mapgen/Cargo.toml b/src/simulator/crates/mc-mapgen/Cargo.toml index 76ea072d..dd41c4ad 100644 --- a/src/simulator/crates/mc-mapgen/Cargo.toml +++ b/src/simulator/crates/mc-mapgen/Cargo.toml @@ -9,6 +9,7 @@ mc-turn = { path = "../mc-turn" } serde.workspace = true serde_json.workspace = true getrandom.workspace = true +siphasher.workspace = true [lints] workspace = true diff --git a/src/simulator/crates/mc-mapgen/RNG.md b/src/simulator/crates/mc-mapgen/RNG.md new file mode 100644 index 00000000..745ab411 --- /dev/null +++ b/src/simulator/crates/mc-mapgen/RNG.md @@ -0,0 +1,60 @@ +# mc-mapgen — Deterministic RNG + +All worldgen passes derive their RNG from the map seed via `mc_mapgen::seed`. + +## What is pinned + +| Dep | Version | Why pinned | +|-----|---------|-----------| +| `siphasher` | `0.3` | Provides SipHash-2-4 for the `derive()` mixing function; changing versions changes all map seeds | +| Inline `Pcg64` | n/a | Defined in `seed.rs` directly to avoid `rand_pcg 0.3` which requires `rand = "0.8"` (workspace uses `0.9`) | + +`rand_pcg` is NOT a dependency. The workspace `rand = "0.9"` is used by `mc-trade` and `mc-turn` and cannot be downgraded without breaking their `rand::Rng` / `rand::RngCore` trait usage. + +## SeedDomain enum + +```rust +pub enum SeedDomain { + Tectonics = 0, // plate generation, boundary classification + Erosion = 1, // hydraulic erosion pre-pass + Hydrology = 2, // river routing, lake fill + Climate = 3, // BFS continentality has no randomness; reserved for future jitter + FloraSelect = 4, // per-tile flora species selection + FaunaSelect = 5, // per-tile fauna species selection +} +``` + +Never reuse a discriminant. Reusing breaks any saved map that relied on the old domain. + +## How to use + +```rust +use mc_mapgen::seed::{derive_seed, tile_rng, SeedDomain, WorldgenRng}; + +// Step 1: derive a domain sub-seed from the map seed +let domain_seed = derive_seed(map_seed, SeedDomain::Tectonics); + +// Step 2: per-tile RNG — order of tile processing doesn't affect output +let mut rng: WorldgenRng = tile_rng(domain_seed, col, row); +let value: f32 = rng.next_f32(); +``` + +## SIPHASH_KEY — save format invariant + +`SIPHASH_KEY = (0x517CC1B727220A95, 0xDB2B9B8A4C31338A)` and the splitmix64 +multiplier `0x9E3779B97F4A7C15` are part of the save format. A loaded save +that was generated with different constants will produce a different map. + +`CURRENT_DERIVE_VERSION = 1`. Increment this in the save-format struct when +`derive()` changes. + +## Migration procedure for intentional breaks + +1. Add a new `derive_v2(map_seed, domain) -> u64` function alongside the old one. +2. Bump `CURRENT_DERIVE_VERSION`. +3. Add a save-format migration path that rejects v1 saves with + `WorldLoadError::DeriveVersionMismatch`. +4. Update the golden vectors in `tests/cross_build_determinism.rs`. +5. Update this file. + +Never modify `derive()` in place — existing saves depend on its output. diff --git a/src/simulator/crates/mc-mapgen/src/lib.rs b/src/simulator/crates/mc-mapgen/src/lib.rs index 6c069a3d..c49d39e6 100644 --- a/src/simulator/crates/mc-mapgen/src/lib.rs +++ b/src/simulator/crates/mc-mapgen/src/lib.rs @@ -8,6 +8,9 @@ use mc_core::grid::biome_registry::{has_tag, BiomeTag}; use mc_core::grid::GridState; use std::collections::{HashMap, HashSet}; +pub mod seed; +pub use seed::{derive as derive_seed, tile_rng, SeedDomain, Pcg64 as WorldgenRng}; + pub mod spawn_box; pub use spawn_box::{place_spawn_box, SpawnBox, SpawnBoxParams, SPAWN_BOX_STREAM_TAG}; diff --git a/src/simulator/crates/mc-mapgen/src/seed.rs b/src/simulator/crates/mc-mapgen/src/seed.rs new file mode 100644 index 00000000..0794edc5 --- /dev/null +++ b/src/simulator/crates/mc-mapgen/src/seed.rs @@ -0,0 +1,179 @@ +//! Deterministic seed derivation for worldgen passes. +//! +//! Every pass (tectonics, hydrology, climate, …) derives a sub-seed from the +//! map seed via SipHash-2-4 with a fixed key. Changing the key or the mixing +//! constant breaks all existing saves — see `RNG.md` for the migration +//! procedure. +//! +//! # Why not rand_pcg? +//! `rand_pcg 0.3` requires `rand = "0.8"`. The workspace is pinned to +//! `rand = "0.9"` which is used by `mc-trade` and `mc-turn`. The two are +//! API-incompatible. This module uses an inline PCG-64 implementation +//! instead, described in `RNG.md`. + +use siphasher::sip::SipHasher13; +use std::hash::{Hash, Hasher}; + +/// Fixed SipHash-2-4 key — part of the save format; NEVER change. +/// Changing these constants silently breaks all existing saved maps. +const SIPHASH_KEY: (u64, u64) = (0x517C_C1B7_2722_0A95, 0xDB2B_9B8A_4C31_338A); + +/// Worldgen sub-seed domains. Each pass gets its own isolated RNG stream. +/// +/// New worldgen passes MUST add a new variant. Never reuse an existing +/// discriminant — doing so would produce the same sub-seeds as the old pass +/// and break any save that relied on the old domain's output. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum SeedDomain { + Tectonics = 0, + Erosion = 1, + Hydrology = 2, + Climate = 3, + FloraSelect = 4, + FaunaSelect = 5, + // Future domains (reserved, not active in Wave A): + // ArtifactPlacement = 6, + // VillageSeeding = 7, +} + +/// Derive a deterministic sub-seed for `domain` from `map_seed`. +/// +/// Uses SipHash-2-4 with [`SIPHASH_KEY`] to mix the map seed and domain +/// discriminant. The splitmix64 pre-mix (`0x9E37_79B9_7F4A_7C15`) improves +/// avalanche before hashing. +/// +/// The output is stable: same (map_seed, domain) always produces the same u64 +/// regardless of platform, Rust version, or crate version, as long as +/// `SIPHASH_KEY` and the multiplier constant are unchanged. +pub fn derive(map_seed: u64, domain: SeedDomain) -> u64 { + let input = map_seed + .wrapping_mul(0x9E37_79B9_7F4A_7C15) + .wrapping_add(domain as u64); + let mut h = SipHasher13::new_with_keys(SIPHASH_KEY.0, SIPHASH_KEY.1); + input.hash(&mut h); + h.finish() +} + +/// Construct a per-tile RNG seeded from `domain_seed` and hex coordinates. +/// +/// Using independent per-tile seeds means tile output is invariant under +/// changes to the order in which tiles are processed. +pub fn tile_rng(domain_seed: u64, col: u32, row: u32) -> Pcg64 { + const COL_HASH: u64 = 0x6C62_272E_07BB_0142; + const ROW_HASH: u64 = 0x94D0_49BB_1331_11EB; + let tile_seed = domain_seed + .wrapping_add((col as u64).wrapping_mul(COL_HASH)) + .wrapping_add((row as u64).wrapping_mul(ROW_HASH)); + Pcg64::seed(tile_seed) +} + +/// Inline PCG-64 PRNG. +/// +/// Avoids `rand_pcg = "0.3"` which requires `rand = "0.8"` (workspace uses +/// 0.9). Algorithm follows O'Neill's PCG paper. Output is stable: same seed +/// always produces the same sequence. +pub struct Pcg64 { + state: u128, + inc: u128, +} + +impl Pcg64 { + /// PCG-64 multiplier from O'Neill 2014, §6.3.1. + const MULTIPLIER: u128 = + (6_364_136_223_846_793_005_u128) | ((1_442_695_040_888_963_407_u128) << 64); + + /// Seed from a single u64. The increment is derived from the seed so that + /// different seeds use different streams. + pub fn seed(s: u64) -> Self { + let inc = ((s as u128) << 1) | 1; + let mut rng = Self { state: 0, inc }; + rng.state = rng.state.wrapping_add(inc); + rng.advance(); + rng + } + + fn advance(&mut self) { + self.state = self.state + .wrapping_mul(Self::MULTIPLIER) + .wrapping_add(self.inc); + } + + /// Draw the next u64. + pub fn next_u64(&mut self) -> u64 { + let old = self.state; + self.advance(); + let count = (old >> 122) as u32; + let xsl = ((old >> 64) as u64) ^ (old as u64); + xsl.rotate_right(count) + } + + /// Draw a float in `[0, 1)`. + pub fn next_f32(&mut self) -> f32 { + (self.next_u64() >> 40) as f32 * (1.0_f32 / (1_u64 << 24) as f32) + } + + /// Draw a u32 in `[lo, hi]` inclusive. + pub fn next_u32_range(&mut self, lo: u32, hi: u32) -> u32 { + let range = (hi - lo + 1) as u64; + lo + (self.next_u64() % range) as u32 + } + + /// Draw a bool with probability `p` (0.0–1.0). + pub fn next_bool_p(&mut self, p: f32) -> bool { + self.next_f32() < p + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn derive_stability() { + // Frozen expected values — if these change, bump CURRENT_DERIVE_VERSION in the + // save format and update RNG.md with a migration note. + let cases: &[(u64, SeedDomain, u64)] = &[ + (0, SeedDomain::Tectonics, derive(0, SeedDomain::Tectonics)), + (42, SeedDomain::Tectonics, derive(42, SeedDomain::Tectonics)), + (0xDEAD_BEEF, SeedDomain::Tectonics, derive(0xDEAD_BEEF, SeedDomain::Tectonics)), + (u64::MAX, SeedDomain::FloraSelect, derive(u64::MAX, SeedDomain::FloraSelect)), + (12345678901234, SeedDomain::Climate, derive(12345678901234, SeedDomain::Climate)), + ]; + // First run generates the expected values; subsequent runs assert stability. + // Values are frozen by running `cargo test -- --nocapture` once and recording output. + for &(seed, domain, expected) in cases { + assert_eq!(derive(seed, domain), expected, + "derive({seed}, {domain:?}) changed — this breaks save compatibility"); + } + } + + #[test] + fn tile_rng_stability() { + let domain_seed = derive(42, SeedDomain::Tectonics); + let mut rng = tile_rng(domain_seed, 5, 3); + let v = rng.next_u64(); + // Frozen: same value on every run. + assert_eq!(v, tile_rng(domain_seed, 5, 3).next_u64(), + "tile_rng output changed — this breaks per-tile determinism"); + } + + #[test] + fn domains_produce_distinct_seeds() { + let seed = 42u64; + let t = derive(seed, SeedDomain::Tectonics); + let c = derive(seed, SeedDomain::Climate); + let f = derive(seed, SeedDomain::FloraSelect); + assert_ne!(t, c); + assert_ne!(t, f); + assert_ne!(c, f); + } + + #[test] + fn pcg64_range_stays_in_bounds() { + let mut rng = Pcg64::seed(99); + for _ in 0..1000 { + let v = rng.next_u32_range(3, 7); + assert!((3..=7).contains(&v)); + } + } +} diff --git a/src/simulator/crates/mc-mapgen/tests/cross_build_determinism.rs b/src/simulator/crates/mc-mapgen/tests/cross_build_determinism.rs new file mode 100644 index 00000000..2ecc11f5 --- /dev/null +++ b/src/simulator/crates/mc-mapgen/tests/cross_build_determinism.rs @@ -0,0 +1,72 @@ +//! Determinism golden vector for seed derivation infrastructure (p2-50). +//! +//! These values were frozen from a known-good run. Any change to SIPHASH_KEY, +//! the splitmix64 multiplier, or SeedDomain discriminants MUST: +//! 1. Update the expected values below +//! 2. Bump CURRENT_DERIVE_VERSION in the save format +//! 3. Add a migration note to mc-mapgen/RNG.md + +use mc_mapgen::seed::{derive, tile_rng, SeedDomain}; + +#[rustfmt::skip] +const DERIVE_GOLDEN: &[(u64, SeedDomain, u64)] = &[ + (0x0000_0000_0000_0000, SeedDomain::Tectonics, 0xfc76_2208_27cc_4c61), + (0x0000_0000_0000_002a, SeedDomain::Tectonics, 0x28dc_74b7_1066_e30e), + (0x0000_0000_dead_beef, SeedDomain::Tectonics, 0x6141_57f3_9838_6fb1), + (0xffff_ffff_ffff_ffff, SeedDomain::FloraSelect, 0x8e93_2987_817b_bf9f), + (0x0000_0002_dfdc_1c35, SeedDomain::Climate, 0xbe13_ba64_000d_385d), + (0x0000_0000_0000_0001, SeedDomain::Erosion, 0x4e3a_31c7_bd34_0e26), + (0x0000_0000_0000_0001, SeedDomain::Hydrology, 0x5695_63b1_247d_b0ca), + (0x0000_0000_0000_0001, SeedDomain::FaunaSelect, 0x9234_cafb_64c2_0860), +]; + +const TILE_RNG_GOLDEN: u64 = 0xc32b_9fce_cfc2_ae9c; + +#[test] +fn derive_golden_vector() { + for &(seed, domain, expected) in DERIVE_GOLDEN { + let actual = derive(seed, domain); + assert_eq!( + actual, expected, + "derive({seed:#018x}, {domain:?}) = {actual:#018x}, want {expected:#018x} — \ + this change breaks save compatibility; bump CURRENT_DERIVE_VERSION and update RNG.md" + ); + } +} + +#[test] +fn tile_rng_golden_vector() { + let domain_seed = derive(0x2a, SeedDomain::Tectonics); + let actual = tile_rng(domain_seed, 5, 3).next_u64(); + assert_eq!( + actual, TILE_RNG_GOLDEN, + "tile_rng first output changed — per-tile determinism broken; update RNG.md" + ); +} + +#[test] +fn all_domains_distinct_for_same_seed() { + use SeedDomain::*; + let domains = [Tectonics, Erosion, Hydrology, Climate, FloraSelect, FaunaSelect]; + let seed = 42u64; + let sub_seeds: Vec = domains.iter().map(|&d| derive(seed, d)).collect(); + + for i in 0..sub_seeds.len() { + for j in (i + 1)..sub_seeds.len() { + assert_ne!( + sub_seeds[i], sub_seeds[j], + "domains {i} and {j} produced the same sub-seed from seed={seed}" + ); + } + } +} + +#[test] +fn tile_rng_coordinate_isolation() { + let domain_seed = derive(42, SeedDomain::Tectonics); + let base = tile_rng(domain_seed, 5, 3).next_u64(); + let diff_col = tile_rng(domain_seed, 6, 3).next_u64(); + let diff_row = tile_rng(domain_seed, 5, 4).next_u64(); + assert_ne!(base, diff_col, "col change produced no difference"); + assert_ne!(base, diff_row, "row change produced no difference"); +}