diff --git a/public/games/age-of-dwarves/data/sim-scenarios/fullgame/game1_headless_systems_150t.json b/public/games/age-of-dwarves/data/sim-scenarios/fullgame/game1_headless_systems_150t.json index c4133048..e60367f9 100644 --- a/public/games/age-of-dwarves/data/sim-scenarios/fullgame/game1_headless_systems_150t.json +++ b/public/games/age-of-dwarves/data/sim-scenarios/fullgame/game1_headless_systems_150t.json @@ -2,7 +2,7 @@ "id": "game1_headless_systems_150t", "kind": "fullgame", "version": 1, - "description": "Broad Game-1 systems run: 4 clans, full evolved map, exercising all systems. Terminates ~120t under these rules (victory). Regression umbrella (calibrated).", + "description": "Broad Game-1 systems run: 4 clans, full evolved map, exercising all systems (tech web + research, trade from owned-tile resources, culture borders, climate/ecology/healing). With the content packs boot-loaded a winner now emerges ~98-113t (victory) instead of running flat to the cap. Regression umbrella (calibrated to the real all-systems run).", "map": { "size": 40, "evolution_ticks": 14000, "seed_base": 150150 }, "players": [ { "personality": "militarist" }, { "personality": "boom" }, @@ -12,9 +12,11 @@ "seeds": [150150, 150151, 150152], "expect": [ { "type": "terminates" }, - { "type": "final_turn", "op": ">=", "value": 120 }, + { "type": "final_turn", "op": ">=", "value": 90 }, { "type": "no_nan_economy" }, { "type": "population_non_negative" }, - { "type": "total_pvp_combats", "op": ">=", "value": 0 } + { "type": "total_pvp_combats", "op": ">=", "value": 0 }, + { "type": "median_tier_peak", "op": ">=", "value": 4 }, + { "type": "trades_formed", "op": ">=", "value": 1 } ] } diff --git a/src/simulator/crates/mc-sim/src/bin/sim_scenario.rs b/src/simulator/crates/mc-sim/src/bin/sim_scenario.rs index 6d901486..f974250e 100644 --- a/src/simulator/crates/mc-sim/src/bin/sim_scenario.rs +++ b/src/simulator/crates/mc-sim/src/bin/sim_scenario.rs @@ -208,9 +208,22 @@ struct BatchResult { seeds_run: usize, passed_seeds: usize, results: Vec, + /// Assertions evaluated once across the whole seed batch rather than + /// per-seed (e.g. `clan_winrate_max`, a property of the win distribution). + #[serde(skip_serializing_if = "Vec::is_empty")] + batch_assertions: Vec, overall_pass: bool, } +#[derive(Debug, Serialize)] +struct BatchAssertion { + label: String, + passed: bool, + /// The measured value the assertion was checked against (for transparency + /// in the JSON output — e.g. the observed max clan winrate). + measured: f64, +} + // ───────────────────────────── Helpers ───────────────────────────── fn cmp(actual: f64, op: &str, target: f64) -> bool { @@ -608,6 +621,11 @@ fn drive_fullgame( map_seed: seed, ..Default::default() }; + // Boot-load the resource→category map (luxury/strategic/bonus) the live + // harness derives from `resources.json`. `process_trade_phase` sources + // tradeable surpluses from owned-tile collectibles classified by this map; + // empty → nothing ever trades (the runner's previous behaviour). + state.resource_categories = load_resource_categories(); let n = sc.players.len().max(1); for i in 0..n { let base_col = 6 + (i as i32 * 4); @@ -631,7 +649,18 @@ fn drive_fullgame( state.players.push(ps); } - let processor = TurnProcessor::new(max_turns); + // Boot-load the tech web exactly as the live harness does + // (`GdPlayerApi::set_tech_web_json`). Without it `process_science` runs + // research-less (tier-1 fallback) and the strategic systems — tech, tiers, + // the unlocks that gate trades — never progress, leaving the + // median_tier_peak / trades_formed assertions un-evaluable. Loading it is + // what makes the fullgame runner exercise "all systems" for real. + let web_json = load_tech_web_json(); + let tier_map = tech_tier_map(&web_json); + let mut processor = TurnProcessor::new(max_turns); + if let Err(e) = processor.set_tech_web_json(&web_json) { + eprintln!("# WARN: tech web load failed ({e}); running research-less"); + } let mut inv = Invariants { no_nan_economy: true, population_non_negative: true, @@ -641,6 +670,7 @@ fn drive_fullgame( let mut total_pvp = 0u32; let mut prev_turn = state.turn; let mut peak_cities: Vec = vec![0; n]; + let mut winner_pi: Option = None; for _ in 1..=max_turns { let result = processor.step(&mut state); @@ -664,7 +694,8 @@ fn drive_fullgame( peak_cities[i] = peak_cities[i].max(p.cities.len()); } } - if result.winner.is_some() { + if let Some((w, _)) = result.winner { + winner_pi = Some(w); inv.terminated = true; break; } @@ -679,11 +710,126 @@ fn drive_fullgame( for (i, c) in peak_cities.iter().enumerate() { metrics.insert(format!("peak_cities_p{i}"), serde_json::json!(c)); } + + // Strategic-system metrics. Each maps to a real engine field so the + // formerly-skipped assertions (median_tier_peak / trades_formed / + // border_growth / clan_winrate) evaluate against the actual run. + // • trades_formed — luxuries+strategics that reached a player's ledger + // via `process_trade_phase`. + // • tier_peak_p{i} — max tier among the player's researched techs + // (`process_science` ↦ PlayerTechState); median across players feeds + // `median_tier_peak`. + // • owned_tiles_p{i} — culture-claimed territory (`process_culture` + // border expansion). Cities start with empty `owned_tiles`, so the + // final count IS the growth from baseline. + let mut trades_formed = 0u64; + let mut tier_peaks: Vec = Vec::with_capacity(n); + for (i, p) in state.players.iter().enumerate() { + trades_formed += (p.traded_luxuries.len() + p.traded_strategics.len()) as u64; + let peak = p + .player_tech + .as_ref() + .map(|pt| { + pt.researched_techs() + .iter() + .filter_map(|id| tier_map.get(id).copied()) + .max() + .unwrap_or(0) + }) + .unwrap_or(0); + tier_peaks.push(peak); + metrics.insert(format!("tier_peak_p{i}"), serde_json::json!(peak)); + let owned: usize = p.cities.iter().map(|c| c.owned_tiles.len()).sum(); + metrics.insert(format!("owned_tiles_p{i}"), serde_json::json!(owned)); + } + let median_tier = { + let mut v = tier_peaks; + v.sort_unstable(); + if v.is_empty() { + 0 + } else { + v[v.len() / 2] + } + }; + metrics.insert("trades_formed".into(), serde_json::json!(trades_formed)); + metrics.insert("median_tier_peak".into(), serde_json::json!(median_tier)); + if let Some(w) = winner_pi { + metrics.insert("winner".into(), serde_json::json!(w)); + } + metrics.insert("end_state_hash".into(), serde_json::json!(hash_state(&state))); (state.turn, metrics, inv) } +/// Concatenate every `public/resources/techs/*.json` pillar (each a JSON array +/// of tech definitions) into one flat array — the same payload the live harness +/// feeds `set_tech_web_json`. Deterministic order (sorted paths) so the run is +/// reproducible across machines. +fn load_tech_web_json() -> String { + let dir = repo_root().join("public/resources/techs"); + let mut all: Vec = Vec::new(); + if let Ok(entries) = fs::read_dir(&dir) { + let mut paths: Vec = entries + .filter_map(|e| e.ok().map(|e| e.path())) + .filter(|p| p.extension().is_some_and(|x| x == "json")) + .collect(); + paths.sort(); + for p in paths { + if let Ok(text) = fs::read_to_string(&p) { + if let Ok(serde_json::Value::Array(defs)) = serde_json::from_str(&text) { + all.extend(defs); + } + } + } + } + serde_json::to_string(&all).unwrap_or_else(|_| "[]".to_string()) +} + +/// Flatten `public/resources/resources.json` into the `{resource_id → +/// category}` map the live harness loads via `set_resource_categories_json`. +/// Top-level keys `bonus` / `luxury` / `strategic` each hold an array of +/// resource entries; the key IS the category. Reads the same canonical pack +/// the live game does (no hardcoded values). +fn load_resource_categories() -> BTreeMap { + let path = repo_root().join("public/resources/resources.json"); + let mut map = BTreeMap::new(); + let Ok(text) = fs::read_to_string(&path) else { + return map; + }; + let Ok(root) = serde_json::from_str::(&text) else { + return map; + }; + for category in ["bonus", "luxury", "strategic"] { + if let Some(arr) = root.get(category).and_then(serde_json::Value::as_array) { + for entry in arr { + if let Some(id) = entry.get("id").and_then(serde_json::Value::as_str) { + map.insert(id.to_string(), category.to_string()); + } + } + } + } + map +} + +/// tech id → tier, parsed from the concatenated tech-web JSON. Backs the +/// `median_tier_peak` metric (a player's peak tier = max tier of its +/// researched techs). +fn tech_tier_map(web_json: &str) -> BTreeMap { + let mut m = BTreeMap::new(); + if let Ok(serde_json::Value::Array(defs)) = serde_json::from_str::(web_json) { + for d in defs { + if let (Some(id), Some(tier)) = ( + d.get("id").and_then(serde_json::Value::as_str), + d.get("tier").and_then(serde_json::Value::as_u64), + ) { + m.insert(id.to_string(), tier as u32); + } + } + } + m +} + fn hash_state(state: &GameState) -> u64 { let json = serde_json::to_string(state).expect("serialize state"); let mut h = DefaultHasher::new(); @@ -727,11 +873,21 @@ fn eval_fullgame(res: &mut SeedResult, sc: &Scenario, seed: u64, final_turn: u32 Assertion::CityCount { player, op, value } => { Some(cmp(m_u64(&format!("peak_cities_p{player}")) as f64, op, *value)) } - // Require real strategic AI play (not available headless yet) — skip honestly. - Assertion::MedianTierPeak { .. } - | Assertion::TradesFormed { .. } - | Assertion::BorderGrowth { .. } - | Assertion::ClanWinrateMax { .. } => None, + // Strategic systems now run for real in the headless fullgame + // (tech web boot-loaded → research/tiers; trade + culture phases + // active), so these evaluate against the actual run. + Assertion::MedianTierPeak { op, value } => { + Some(cmp(m_u64("median_tier_peak") as f64, op, *value)) + } + Assertion::TradesFormed { op, value } => { + Some(cmp(m_u64("trades_formed") as f64, op, *value)) + } + Assertion::BorderGrowth { player, op, value } => { + Some(cmp(m_u64(&format!("owned_tiles_p{player}")) as f64, op, *value)) + } + // clan_winrate_max is inherently a batch property (win fraction + // across the seed set), evaluated once in main(), not per seed. + Assertion::ClanWinrateMax { .. } => None, _ => None, }; match outcome { @@ -775,6 +931,38 @@ fn parse_seeds(sc: &Scenario, args: &[String]) -> Vec { vec![base, base + 1, base + 2] } +/// Evaluate assertions that are properties of the whole seed batch rather than +/// a single run. Currently `clan_winrate_max`: the fraction of games won by the +/// most-winning player must stay at/below the band (fairness). Reads the +/// per-seed `winner` metric recorded by the fullgame driver. +fn eval_batch_assertions(sc: &Scenario, results: &[SeedResult]) -> Vec { + let mut out = Vec::new(); + for a in &sc.expect { + if let Assertion::ClanWinrateMax { op, value } = a { + let mut wins: BTreeMap = BTreeMap::new(); + let mut decided = 0u32; + for r in results { + if let Some(w) = r.metrics.get("winner").and_then(serde_json::Value::as_u64) { + *wins.entry(w).or_insert(0) += 1; + decided += 1; + } + } + // Undecided batch (no winners) → vacuously fair (max winrate 0). + let max_winrate = if decided == 0 { + 0.0 + } else { + wins.values().copied().max().unwrap_or(0) as f64 / decided as f64 + }; + out.push(BatchAssertion { + label: format!("{a:?}"), + passed: cmp(max_winrate, op, *value), + measured: max_winrate, + }); + } + } + out +} + fn main() { let args: Vec = env::args().collect(); if args.len() < 2 { @@ -799,7 +987,15 @@ fn main() { .collect(); let passed = results.iter().filter(|r| r.failed.is_empty()).count(); - let overall = passed == results.len() && !results.is_empty(); + let seeds_passed = passed == results.len() && !results.is_empty(); + + // Batch-level assertions: properties of the whole seed distribution, not a + // single run. `clan_winrate_max` is the win fraction of the most-winning + // player across the batch (winner index recorded per fullgame seed). + let batch_assertions = eval_batch_assertions(&sc, &results); + let batch_ok = batch_assertions.iter().all(|a| a.passed); + let overall = seeds_passed && batch_ok; + let batch = BatchResult { scenario_id: sc.id.clone(), kind: sc.kind.clone(), @@ -807,13 +1003,19 @@ fn main() { seeds_run: results.len(), passed_seeds: passed, results, + batch_assertions, overall_pass: overall, }; println!("{}", serde_json::to_string_pretty(&batch).unwrap()); + let failed_batch: Vec<&BatchAssertion> = + batch.batch_assertions.iter().filter(|a| !a.passed).collect(); if overall { eprintln!("# SCENARIO PASS: {}/{} seeds — {}", passed, batch.seeds_run, sc.id); } else { eprintln!("# SCENARIO FAIL: {}/{} seeds — {}", passed, batch.seeds_run, sc.id); + for a in failed_batch { + eprintln!("# batch assertion failed: {} (measured {:.3})", a.label, a.measured); + } std::process::exit(1); } } diff --git a/src/simulator/crates/mc-tech/src/state.rs b/src/simulator/crates/mc-tech/src/state.rs index ee9d862b..b474f43f 100644 --- a/src/simulator/crates/mc-tech/src/state.rs +++ b/src/simulator/crates/mc-tech/src/state.rs @@ -42,11 +42,35 @@ pub enum ResearchResult { /// Mutable per-player research state. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PlayerTechState { + // `HashSet` iteration order is non-deterministic, which would make any + // serialization of a `GameState` carrying this set (e.g. the sim-scenario + // `end_state_hash` determinism check) differ run-to-run despite identical + // logical state. A researched-set has no meaningful order, so serialize it + // sorted — canonical, deterministic, and transparent to every caller (the + // in-memory type and `researched_techs()` accessor are unchanged). + #[serde(serialize_with = "serialize_sorted_set")] researched: HashSet, researching: Option, research_progress: u32, } +/// Serialize a `HashSet` as a sorted JSON array so the output is +/// deterministic regardless of hash iteration order. Deserialization uses the +/// default `HashSet` path (order-independent on the way in). +fn serialize_sorted_set(set: &HashSet, serializer: S) -> Result +where + S: serde::Serializer, +{ + use serde::ser::SerializeSeq; + let mut sorted: Vec<&String> = set.iter().collect(); + sorted.sort_unstable(); + let mut seq = serializer.serialize_seq(Some(sorted.len()))?; + for item in sorted { + seq.serialize_element(item)?; + } + seq.end() +} + impl PlayerTechState { /// Create a new empty player tech state. pub fn new() -> Self {