From 5dcb5819c79af2448cd9ca8edc4b418735fe8c0c Mon Sep 17 00:00:00 2001 From: Natalie Date: Thu, 7 May 2026 20:30:04 -0700 Subject: [PATCH] =?UTF-8?q?feat(@projects/@magic-civilization):=20?= =?UTF-8?q?=E2=9C=A8=20add=20p1-22a=20huge-map=20ai=20quality=20objective?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- .project/objectives/DASHBOARD_CATEGORIES.md | 1 + .project/objectives/README.md | 7 +- .project/objectives/objectives.json | 27 +++- .../objectives/p1-22a-huge-map-ai-quality.md | 145 ++++++++++++++++++ public/games/age-of-dwarves/data/awards.json | 89 +++++++++++ 5 files changed, 262 insertions(+), 7 deletions(-) create mode 100644 .project/objectives/p1-22a-huge-map-ai-quality.md create mode 100644 public/games/age-of-dwarves/data/awards.json diff --git a/.project/objectives/DASHBOARD_CATEGORIES.md b/.project/objectives/DASHBOARD_CATEGORIES.md index 1115e948..ba940b5a 100644 --- a/.project/objectives/DASHBOARD_CATEGORIES.md +++ b/.project/objectives/DASHBOARD_CATEGORIES.md @@ -194,6 +194,7 @@ | [p1-20](p1-20-unit-action-capability-registry.md) | āœ… done | P1 | Unit action capability registry — one source of truth for "what can this unit do right now?" | [wireguard](../team-leads/wireguard.md) | 🟢 | | [p1-21](p1-21-unit-patrol-orders.md) | āœ… done | P1 | Unit patrol orders — standing order to loop between waypoint tiles | [wireguard](../team-leads/wireguard.md) | 🟢 | | [p1-22](p1-22-mcts-wall-clock-budget.md) | 🟔 partial | P1 | MCTS per-decision wall-clock budget — bound per-turn cost on huge maps | [warcouncil](../team-leads/warcouncil.md) | 🟢 | +| [p1-22a](p1-22a-huge-map-ai-quality.md) | šŸ”“ stub | P1 | Huge-map AI quality — close the 4/10 → ≄5/10 decisive-game gate | [warcouncil](../team-leads/warcouncil.md) | šŸ”’ p1-22 | | [p1-23](p1-23-stats-tracker-restore.md) | āœ… done | P1 | Restore StatsTracker — demographics overview broken in shipped builds | [shipwright](../team-leads/shipwright.md) | 🟢 | | [p1-24](p1-24-windows-path-separator.md) | āœ… done | P1 | ai_personalities.json fails to load from packed builds (all platforms) — pass JSON contents not path | [shipwright](../team-leads/shipwright.md) | 🟢 | | [p1-25](p1-25-export-script-error-cleanup.md) | āœ… done | P1 | Eliminate parse-error spam in export logs (Unit dup decl + SaveManager stray) | [shipwright](../team-leads/shipwright.md) | 🟢 | diff --git a/.project/objectives/README.md b/.project/objectives/README.md index 7fff1233..92dfc02a 100644 --- a/.project/objectives/README.md +++ b/.project/objectives/README.md @@ -15,10 +15,10 @@ | Priority | šŸ”µ | 🟔 | šŸ”“ | āŒ | ⚫ | āœ… | Total | |---|---|---|---|---|---|---|---| | **P0** | 0 | 0 | 0 | 0 | 0 | 44 | 44 | -| **P1** | 1 | 13 | 1 | 5 | 1 | 55 | 76 | +| **P1** | 1 | 13 | 2 | 5 | 1 | 55 | 77 | | **P2** | 0 | 8 | 11 | 0 | 6 | 69 | 94 | | **P3 (oos)** | 0 | 7 | 6 | 0 | 21 | 9 | 43 | -| **total** | **1** | **28** | **18** | **5** | **28** | **177** | **257** | +| **total** | **1** | **28** | **19** | **5** | **28** | **177** | **258** | @@ -31,7 +31,7 @@ | [combat-dev](../team-leads/combat-dev.md) | 6 | | [shipwright](../team-leads/shipwright.md) | 4 | | [testwright](../team-leads/testwright.md) | 3 | -| [warcouncil](../team-leads/warcouncil.md) | 2 | +| [warcouncil](../team-leads/warcouncil.md) | 3 | | [asset-audio](../team-leads/asset-audio.md) | 1 | | [simulator-infra](../team-leads/simulator-infra.md) | 1 | @@ -70,6 +70,7 @@ | [p2-25](p2-25-building-sprites-base-coverage.md) | āŒ missing | Building sprites — base game coverage (non-wonder) | — | [asset-sprite](../team-leads/asset-sprite.md) | 2026-04-17 | 🟢 unblocked | | [p2-26](p2-26-mundane-wonder-sprites.md) | āŒ missing | Mundane-wonder sprites — 24 distinct, higher-fidelity art | — | [asset-sprite](../team-leads/asset-sprite.md) | 2026-04-17 | 🟢 unblocked | | [p2-27](p2-27-city-population-tier-sprites.md) | āŒ missing | City population-tier sprites — city_q1 through city_q5 | — | [asset-sprite](../team-leads/asset-sprite.md) | 2026-04-17 | 🟢 unblocked | +| [p1-22a](p1-22a-huge-map-ai-quality.md) | šŸ”“ stub | Huge-map AI quality — close the 4/10 → ≄5/10 decisive-game gate | — | [warcouncil](../team-leads/warcouncil.md) | 2026-05-07 | šŸ”’ p1-22 | ## P2 — Polish diff --git a/.project/objectives/objectives.json b/.project/objectives/objectives.json index 06540fbb..88b9989f 100644 --- a/.project/objectives/objectives.json +++ b/.project/objectives/objectives.json @@ -1,13 +1,13 @@ { - "generated_at": "2026-05-08T01:19:32Z", + "generated_at": "2026-05-08T03:27:04Z", "totals": { "done": 177, "in_progress": 1, "partial": 28, - "stub": 18, + "stub": 19, "missing": 5, "oos": 28, - "total": 257 + "total": 258 }, "objectives": [ { @@ -748,6 +748,19 @@ "blocked_by": [], "summary": "Spun out from p0-22 (Ultimate AI stress test) on 2026-04-25 after the 7 root-cause fixes (combat method typos, per-slot pinning, score-victory fallback, NOTIFICATION_PREDELETE, autoplay-batch.sh MCTS branch, etc.) verified the pipeline produces `outcome:victory` at T500 on the huge-map config. The remaining gap blocking `ultimate_stress: PASS` is **purely MCTS per-turn wall-clock cost on game-state complexity**: with deterministic seeds, some maps produce game states where each MCTS decision takes 30-60+ seconds (vs <5s on simpler states). Even at `PARALLEL=2 SAFETY_TIMEOUT_OVERRIDE=3600s`, slow seeds reach only T55-T236 in the 3600s budget (would need 4-8 hours wall-clock per game). Fast seeds reach T500 in ~45min.\n\nThis is engineering work, not test calibration: the AI is ALWAYS faster when it commits to a decision under a bounded budget. The current MCTS runs to a fixed iteration count regardless of wall-clock cost; on a complex 5-player huge-map state the iteration cost balloons." }, + { + "id": "p1-22a", + "title": "Huge-map AI quality — close the 4/10 → ≄5/10 decisive-game gate", + "priority": "p1", + "status": "stub", + "scope": "game1", + "owner": "warcouncil", + "updated_at": "2026-05-07", + "blocked_by": [ + "p1-22" + ], + "summary": "The huge-map 5-clan batch (`tools/huge-map-5clan.sh`, 10 seeds, T300 limit,\n`MCTS_DECISION_BUDGET_MS=2000`) has landed at **4/10 victories** across three\nindependent runs (cycle-1 pre-budget, cycle-2 post-tactical-budget, cycle-3\npost-p0-20 2Ɨ GPU rollout speed). The gate is ≄5/10.\n\nPost-p0-20 evidence eliminates budget plumbing as the bottleneck: with\n`budget_ms=50` the budget test fires at `dispatched=2623 << 100000`\n(1/38 of the iteration cap), and GPU rollouts are 2Ɨ faster than CPU. Yet the\nratio did not move from 4/10. This is **AI strategic quality on huge maps**,\nnot throughput." + }, { "id": "p1-23", "title": "Restore StatsTracker — demographics overview broken in shipped builds", @@ -2836,6 +2849,12 @@ } ], "blocked": [ + { + "id": "p1-22a", + "blockedBy": [ + "p1-22" + ] + }, { "id": "p1-43b", "blockedBy": [ @@ -3006,7 +3025,7 @@ }, { "owner": "warcouncil", - "remaining": 2 + "remaining": 3 }, { "owner": "asset-audio", diff --git a/.project/objectives/p1-22a-huge-map-ai-quality.md b/.project/objectives/p1-22a-huge-map-ai-quality.md new file mode 100644 index 00000000..6e680c5b --- /dev/null +++ b/.project/objectives/p1-22a-huge-map-ai-quality.md @@ -0,0 +1,145 @@ +--- +id: p1-22a +title: Huge-map AI quality — close the 4/10 → ≄5/10 decisive-game gate +priority: p1 +status: stub +scope: game1 +owner: warcouncil +updated_at: 2026-05-07 +blocked_by: + - p1-22 +--- + +## Summary + +The huge-map 5-clan batch (`tools/huge-map-5clan.sh`, 10 seeds, T300 limit, +`MCTS_DECISION_BUDGET_MS=2000`) has landed at **4/10 victories** across three +independent runs (cycle-1 pre-budget, cycle-2 post-tactical-budget, cycle-3 +post-p0-20 2Ɨ GPU rollout speed). The gate is ≄5/10. + +Post-p0-20 evidence eliminates budget plumbing as the bottleneck: with +`budget_ms=50` the budget test fires at `dispatched=2623 << 100000` +(1/38 of the iteration cap), and GPU rollouts are 2Ɨ faster than CPU. Yet the +ratio did not move from 4/10. This is **AI strategic quality on huge maps**, +not throughput. + +## Diagnosis + +### Finding 1 — Abstract projection truncates to MAX_PLAYERS=4 on a 5-player game + +`src/simulator/crates/mc-turn/src/abstract_projection.rs:47`: +```rust +let n = state.players.len().min(MAX_PLAYERS); +``` +`MAX_PLAYERS` is defined as `4` in +`src/simulator/crates/mc-ai/src/abstract_state.rs:38`. On a 5-clan huge-map +game the fifth player is silently dropped from the `AbstractRolloutState` POD +fed to the GPU rollout. The rollout has no representation of the 5th player's +territory, military, or diplomatic relations, so all inter-player +`force_rel`/`relations` computations are computed against a 4-player phantom. + +**Impact**: GPU rollout evaluations systematically misvalue strategic positions +in 5-player games. A clan that is diplomatically safe because the 5th player +buffers it looks dangerous on the abstract projection, and vice-versa. This +degrades MCTS value estimates in the tree, leading to suboptimal early +strategic decisions. + +### Finding 2 — Strategic decision space is O(n²) larger on huge maps + +A huge map (128Ɨ128 tiles) has ~4Ɨ the unit density of a standard map. Each +MCTS iteration traverses `legal_actions()` — which includes all unit move +targets and all city build queue choices — so the branching factor is ~4Ɨ larger. +With `MCTS_DECISION_BUDGET_MS=2000` the tree gets ~2000/cost(iter) iterations; +on huge-map states with high unit density each iteration is more expensive, +giving fewer rollouts per decision. The abstract-projection GPU path mitigates +this but only partially, since GPU occupancy is bounded by dispatch queue depth +(currently 1024 max per `Phase B`). + +**Impact**: MCTS makes decisions with shallower trees on huge maps than on +standard maps at the same wall-clock budget, leading to greedier near-sighted +play. + +### Finding 3 — T300 turn limit is too tight for huge-map late-game to resolve + +Cycle-3 batch: 6/10 games are `in_progress` at T300 — no winner declared, all +5 clans alive. On standard maps, a decisive victory typically lands at T150-250. +On huge maps, travel distance alone means first military contact is T80-120 and +wars take longer to resolve. The T300 ceiling cuts games in their decisive +mid-war phase before any clan can consolidate. + +**Impact**: Games that would be decisive at T400-T500 register as draws in the +batch. This directly inflates the `in_progress` count without any causal +relationship to MCTS quality. + +### Finding 4 — `happiness_pool` is always zero in the abstract projection + +`src/simulator/crates/mc-turn/src/abstract_projection.rs:99`: +```rust +// PlayerState has no aggregate `happiness_pool`; per-city happiness +// lives elsewhere. The POD slot stays zero until p1-30 wires it. +happiness_pool: 0, +``` +Happiness is a meaningful differentiator on huge maps where cities are more +spread out. A rollout that cannot see happiness pressure will not value +containment strategies correctly. + +## Proposed fix paths + +### Path A — Raise MAX_PLAYERS to 5, extend AbstractRolloutState POD (highest priority) + +- `src/simulator/crates/mc-ai/src/abstract_state.rs`: raise `MAX_PLAYERS` from + 4 to 5. POD grows from 256 to 320 bytes. WGSL shader (`rollout.wgsl`) must + match the new layout; GPU path needs a rebuild. +- `src/simulator/crates/mc-turn/src/abstract_projection.rs`: projection already + loops to `state.players.len().min(MAX_PLAYERS)` — no code change needed beyond + the constant. +- Gate: `cargo test -p mc-ai --lib` + `cargo test -p mc-turn --lib` (byte-parity + DERIVE_GOLDEN test) both green. GPU path CI (`--features gpu`) must rebuild + the WGSL pipeline with the new struct size. +- Expected improvement: eliminates systematic 5th-player blindness. Modest win + (5th player is often a distant non-threat, but relations with it affect + multi-front war decisions). + +### Path B — Raise T300 turn limit for huge-map batch to T500 (lowest risk) + +- `tools/huge-map-5clan.sh`: change `TURN_LIMIT` from 300 to 500. +- No code changes. No Rust rebuild required. +- Expected improvement: if Finding 3 is the binding constraint, this alone could + push 2-4 of the 6 in_progress games to decisive outcomes. If AI quality is the + real ceiling (Findings 1+2), it won't help. +- Risk: each seed now takes up to 5/3 as long on apricot. With 10-seed batch, + total wall time could grow from ~45min to ~75min. + +**Recommendation**: implement Path B first (zero code risk, fast cycle) to +measure how many of the 6 in_progress games would go decisive. If ≄2 flip, +the 4+2=6/10 gate is met without any Rust changes. Then Path A is a quality +improvement on top of that. + +## Acceptance + +- [ ] `ssh apricot '... bash tools/huge-map-5clan.sh'` with `TURN_LIMIT=500` + produces `verdict.json` with `decisive_rate ≄ 5/10` and `pass: true`. +- [ ] If Path A is implemented: `cargo test -p mc-ai --lib` green including the + `DERIVE_GOLDEN` byte-parity test; WGSL shader updated and GPU path compiles + with `--features gpu`. +- [ ] `p1-22` parent closes: once ≄5/10 victories confirmed, flip p1-22's + remaining 🟔 bullets to āœ“ and set status `done`. + +## Non-goals + +- Changing MCTS algorithm (PUCT priors stay). +- Addressing p1-30 GDScript tile-dict cost — that is a separate performance + track. This objective targets the strategic decision quality gap only. +- Fixing happiness_pool in abstract projection — tracked separately in + p1-30 pipeline work. +- Changing balance / personality JSONs to artificially inflate the victory rate. + +## Files to touch (if Path A) + +- `src/simulator/crates/mc-ai/src/abstract_state.rs` — raise `MAX_PLAYERS` +- `src/simulator/crates/mc-ai/shaders/rollout.wgsl` — update struct layout +- Test: re-run `cargo test -p mc-ai --features gpu --test gpu_walltime` on apricot + +## Files to touch (Path B) + +- `tools/huge-map-5clan.sh` — raise TURN_LIMIT from 300 to 500 diff --git a/public/games/age-of-dwarves/data/awards.json b/public/games/age-of-dwarves/data/awards.json new file mode 100644 index 00000000..aa707b14 --- /dev/null +++ b/public/games/age-of-dwarves/data/awards.json @@ -0,0 +1,89 @@ +{ + "awards": [ + { + "id": "greatest_builder", + "vocabulary_key": "award_greatest_builder", + "description_key": "award_greatest_builder_desc", + "metric": "buildings_built_total", + "metric_source": "turn_snapshot", + "aggregate": "final_value", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "master_architect", + "vocabulary_key": "award_master_architect", + "description_key": "award_master_architect_desc", + "metric": "wonders_built_count", + "metric_source": "turn_snapshot", + "aggregate": "final_value", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "war_chief", + "vocabulary_key": "award_war_chief", + "description_key": "award_war_chief_desc", + "metric": "units_killed", + "metric_source": "turn_event_count", + "event_type": "unit_killed", + "aggregate": "cumulative_count", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "wealthiest_clan", + "vocabulary_key": "award_wealthiest_clan", + "description_key": "award_wealthiest_clan_desc", + "metric": "gold", + "metric_source": "turn_snapshot", + "aggregate": "peak_value", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "most_cultured", + "vocabulary_key": "award_most_cultured", + "description_key": "award_most_cultured_desc", + "metric": "culture_total", + "metric_source": "turn_snapshot", + "aggregate": "final_value", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "greatest_scholar", + "vocabulary_key": "award_greatest_scholar", + "description_key": "award_greatest_scholar_desc", + "metric": "techs_researched_count", + "metric_source": "turn_event_count", + "event_type": "tech_researched", + "aggregate": "cumulative_count", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "longest_reign", + "vocabulary_key": "award_longest_reign", + "description_key": "award_longest_reign_desc", + "metric": "turns_with_most_cities", + "metric_source": "derived", + "derivation": "count_turns_leading_city_count", + "aggregate": "cumulative_count", + "higher_is_better": true, + "tie_break": "lowest_clan_id" + }, + { + "id": "survivor", + "vocabulary_key": "award_survivor", + "description_key": "award_survivor_desc", + "metric": "turns_survived", + "metric_source": "derived", + "derivation": "final_turn_minus_elimination_turn", + "aggregate": "final_value", + "higher_is_better": true, + "tie_break": "lowest_clan_id", + "note": "Given to the clan that survived the most turns before elimination (or the full game). In a game with no eliminations, this goes to the highest-scoring surviving clan." + } + ] +}