From a4fdd7b782c508ce8fde3ddd71dfb70195db1ab1 Mon Sep 17 00:00:00 2001 From: Natalie Date: Fri, 29 May 2026 19:46:06 -0600 Subject: [PATCH] =?UTF-8?q?feat(@projects/@magic-civilization):=20?= =?UTF-8?q?=E2=9C=A8=20add=20survival=20scorecard=20tooling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- .project/objectives/p1-29d-p1-survival.md | 72 ++++++- .../objectives/p1-29e-rl-divergence-mining.md | 12 ++ .project/objectives/p2-16-audio-assets.md | 32 +++ .../screenshots/audio-smoke-2026-05-29.md | 127 ++++++++++++ .../engine/tests/unit/test_audio_manager.gd | 89 +++++++++ tools/p1-survival-score.py | 187 ++++++++++++++++++ 6 files changed, 517 insertions(+), 2 deletions(-) create mode 100644 .project/screenshots/audio-smoke-2026-05-29.md create mode 100755 tools/p1-survival-score.py diff --git a/.project/objectives/p1-29d-p1-survival.md b/.project/objectives/p1-29d-p1-survival.md index a735faff..41740002 100644 --- a/.project/objectives/p1-29d-p1-survival.md +++ b/.project/objectives/p1-29d-p1-survival.md @@ -5,8 +5,13 @@ priority: p1 status: partial scope: game1 owner: warcouncil -updated_at: 2026-05-16 +updated_at: 2026-05-29 evidence: + - "RE-BASELINE 2026-05-29 apricot batch 20260529_185955 (current main e22d78fa5, builds p1-29e production patch; 10 seeds T300): P1 tier_peak now 2-6 in ALL 10 seeds — the old 'P1 stuck at tier_peak=1' symptom is GONE (confirms p1-29e F-reframe). Scored via tools/p1-survival-score.py." + - "RE-BASELINE verdict: BOTH gates fail on current main. GATE A (survival, file: >=7/10 alive-aware P1 tp>=2) = 2/10 FAIL (P1 dies in 8/10; peak-ever tp>=2 doesn't help a dead P1). GATE B (convergence, title/dispatch: P1 elim<=T100 OR stalled, 10/10) = 6/10 FAIL (misses s3/s7 elim AFTER T100; s5/s9 alive AND developing). Gates are anti-correlated on s5/s9." + - "tools/p1-survival-score.py — reusable both-ways scorer for this objective family (raw per-seed table + GATE A survival + GATE B convergence). Scans full turn_stats for P1 elimination turn." + - "FINDING: p1-29e sole-city production break-out is INERT for its target population — gated own_mil>=2, but P1 mil snapshot = 0 in all 10 seeds (P1 fights via transient units between snapshots). Refiles to p1-29e." + - "RATIFIED 2026-05-29 (operator): acceptance is now a multi-gate scorecard, not a single gate. p1-29d owns D1 (convergence, 6/10), D2 (competitive survival = old single gate, 2/10), D3 (no-zombie, 0/2). Family gates F1 development-reach 10/10 ✓, F2 game-length ✓, F3 decisiveness ✓ owned by p1-29c/a/b. Stays partial: D1/D2/D3 all fail. No balance code changed this pass (D2 at 2/10 is in the 'do not iterate' band)." - "src/simulator/crates/mc-core/src/combat_balance.rs:57-94 — SoloCityGrace block added with JSON-driven defaults (1.0/0 inert when omitted)" - "public/games/age-of-dwarves/data/combat_balance.json:7-10 — canonical magnitude defense_mult=1.75, turns=80" - "src/simulator/crates/mc-combat/src/resolver.rs:264-275, 597-606 — defender_solo_city_grace_mult field on CombatParams, composes multiplicatively with last_stand, clamped to >=1.0" @@ -40,7 +45,9 @@ Likely contributors (any/all): 3. **AI tactical decisions** — P1 may be over-attacking (losing units offensively) instead of fortifying. 4. **Personality skew** — most clans bias toward "Conquest" axis early, even from a weaker position. -## Acceptance +## Acceptance (historical single-gate — SUPERSEDED by the ratified scorecard above) + +> Retained as the work record of the 2026-05-15/16 single-gate iteration. The "≥7/10 alive-aware P1 tier_peak ≥ 2" gate below is now gate **D2** in the ratified scorecard. Canonical acceptance is the multi-gate scorecard. - ✓ Diagnose which contributor(s) dominate — performed on apricot batch `20260515_215705`. Already-shipped `player_stats` fields (`cities_lost`, `units_lost`, `kills`, `mil`, `pop`) tell the story without new instrumentation: @@ -96,6 +103,67 @@ Top hypothesis if the apricot gate still misses 7/10 after batch runs: - Tuning bump: raise `defense_mult` from 1.75 → 2.25 and/or extend `turns` from 80 → 120. The `last_stand` cap of 3.0× is precedent for "hard but not impossible" — 2.25 keeps us well below domination-blocking territory while substantially raising P1's survival odds against the dominant clan's tier-2 army wave. - Secondary lever: in `decide_military_action`, when `me.cities.len()==1 && enemy_mil_count > own_mil_count`, also bias unit movement TOWARD `me.cities[0].hex` instead of just suppressing step 8 — actively reel scattered units home. +## Status (2026-05-29) — re-baseline on current main; BOTH gates fail → resolved into the ratified multi-gate scorecard (see Acceptance below) + +Per p1-29e's explicit recommendation ("re-baseline p1-29c/29d against current main before further patch work"), ran a clean apricot batch `20260529_185955` on current main (`e22d78fa5`, which carries the p1-29e sole-city production break-out). Scored with `tools/p1-survival-score.py`. + +Per-seed (P1 = trailing slot 1): + +| seed | endT | outcome | P0 tp | P1 tp | P1 cities | P1 elimT | P1 alive | +|---|---|---|---|---|---|---|---| +| 1 | 63 | victory | 2 | 2 | 0 | 63 | no | +| 2 | 44 | victory | 2 | 2 | 0 | 44 | no | +| 3 | 153 | victory | 6 | 3 | 0 | 153 | no | +| 4 | 100 | victory | 2 | 3 | 0 | 100 | no | +| 5 | 300 | victory | 7 | 5 | 1 | — | **yes** | +| 6 | 78 | victory | 2 | 2 | 0 | 78 | no | +| 7 | 203 | victory | 7 | 5 | 0 | 203 | no | +| 8 | 65 | victory | 2 | 2 | 0 | 65 | no | +| 9 | 286 | in_progress | 10 | 6 | 1 | — | **yes** | +| 10 | 56 | victory | 2 | 2 | 0 | 56 | no | + +**Key change vs the 2026-05-16 baseline:** P1 `tier_peak` rose from `1` in all 10 seeds to `2-6` in all 10 — the old "stuck at tier 1" symptom is **resolved on current main** (pure-research drift, not the production patch — P1 still completes 0 buildings; see p1-29e). Game-length/elimination-timing dynamics are otherwise unchanged from baseline. + +**Both candidate gates FAIL, in opposite ways:** +- **GATE A — survival (this file's Acceptance bullet):** ≥7/10 alive-aware seeds with P1 `tier_peak ≥ 2`. Result **2/10 FAIL** (only s5, s9 end alive). The tier_peak symptom is gone, but P1 still loses its capital in 8/10 — a dead P1 with peak-ever tp≥2 does not satisfy an *alive-aware* gate. This matches p1-29a's "structural, territory problem" conclusion. +- **GATE B — convergence (this objective's TITLE + the dispatch):** P1 eliminated OR stalled before T100, in 10/10. Result **6/10 FAIL**. Misses: s3 (elim T153) and s7 (elim T203) — P1 eliminated but *after* T100 (lingering wildcard); s5 and s9 — P1 alive *and* developing to tp 5/6 (a genuine surviving contender, not "converged"). + +The two gates are **anti-correlated**: s5/s9 are the only GATE A passes and the GATE B failures. They demand opposite engineering (make P1 stronger vs. make the game converge), so the objective cannot be closed — or even correctly iterated — until the canonical gate is chosen. Question forwarded to the orchestrator (Clare); per-seed data scored both ways above so the decision can be made in one shot. + +**Discipline note:** under GATE A the result is 2/10, inside the objective's own "stop, do NOT iterate (0-2/10)" band — so no balance tuning was attempted. Under GATE B the result is 6/10 with a concrete 4-seed miss list, which *would* invite targeted work, but GATE B is not yet a written acceptance criterion. **No code changed in this pass** — re-baseline + measurement only. + +**Secondary finding (refiled to p1-29e):** the p1-29e sole-city production break-out is **inert** for its target population. Its gate `own_mil >= SOLE_CITY_ECON_MIN_DEFENDERS (2)` never fires because P1's `mil` snapshot is 0 in all 10 seeds (P1 fights via transient units that exist between snapshots). The lever cannot help the very player it targets until that floor is reconsidered. + +## Acceptance — multi-gate scorecard (RATIFIED 2026-05-29 by operator) + +> This scorecard is the canonical acceptance structure for p1-29d, ratified by the operator on 2026-05-29 ("it sounds like we should have many gates"). It supersedes the single-gate `## Acceptance (historical …)` section below, which is retained as the work record. p1-29d closes `done` only when its three owned gates (D1, D2, D3) all pass on a clean apricot batch. + +Per operator steer, p1-29d is not a single pass/fail — trailing-AI health is multi-dimensional, and the binary survival-vs-convergence framing hid a third mode (zombie survivors: s5/s9 end *alive but inert*, `mil=0`, never a threat). The family already owns most dimensions; **p1-29d uniquely owns the END-GAME FATE of the trailing AI.** Scored on batch `20260529_185955`: + +**Family-level gates (owned elsewhere; reported for context):** +| # | Dimension | Threshold | Provenance | Current | | +|---|---|---|---|---|---| +| F1 | Development reach | P1 `tier_peak ≥ 2` ever, ≥7/10 | p1-29c (done) | 10/10 | ✓ | +| F2 | Game length | median ≤ T500 | p1-29 (user 2026-04-26) | median 89 | ✓ | +| F3 | Decisiveness | victory by cap | p1-29 cycle-4 | 9/10 + 1 in_prog | ✓ | + +**p1-29d-owned gates (end-game fate of trailing AI):** +| # | Dimension | Threshold | Source | Current | | +|---|---|---|---|---|---| +| D1 | Convergence | P1 eliminated≤T100 OR stalled(alive,tp≤1), 10/10 | title + dispatch | 6/10 | ✗ | +| D2 | Competitive survival | P1 ends ALIVE with tp≥2, ≥7/10 | body Acceptance (liveness-strict) | 2/10 | ✗ | +| D3 | No-zombie | among ALIVE survivors, P1 non-inert (`mil>0` or >1 city) | ratified 2026-05-29 (this re-baseline) | 0/2 survivors healthy | ✗ | + +**The 4 non-converged seeds split into two distinct pathologies:** +- **Late elimination (s3 @T153, s7 @T203):** P1 lingers as a wildcard well past T100 before dying — fails D1. +- **Zombie tail (s5 @T300, s9 @T286):** P1 alive, researched to tp 5/6, but `mil=0`, 1 city, `kills=0` — an ignored bystander, not a competitor. Passes D2's letter (alive+developed) but fails D3 (inert). These are the *only* D2 "successes," and they are not healthy survivals. + +**Net trailing-AI fate on current main:** 6/10 clean fast convergence, 2/10 late elimination, 2/10 inert zombie tail, **0/10 genuine competitive survival.** The headline win from this re-baseline is F1 (development reach now 10/10 — the old "stuck at tier 1" symptom is fixed). The remaining unhealth is concentrated in 4 seeds with two named, distinct causes. + +> Thresholds for D1/D2 are lifted verbatim from existing sources; D3 and its threshold were newly minted in this re-baseline and ratified alongside D1/D2 on 2026-05-29. + +**Next worker — direction (no gate targeted yet; operator ratified the panel but did not pick a column to drive):** the two pathologies are independent and have distinct levers. Late elimination (D1, s3/s7) is a *combat/pacing* problem — P1 dies after lingering, so the lever is either faster resolution or a turn-floored convergence. Zombie tail (D3, s5/s9) is an *AI-behaviour* problem — P1 survives but builds no military (`mil=0`) and never contests; the lever is making an isolated, un-threatened sole-city AI either expand/militarise or get found-and-finished by P0. Note the p1-29e production break-out is **inert** here (`own_mil>=2` floor never fires; P1 `mil`=0 in 10/10) — reconsider that floor before relying on it. Per the objective's discipline, D2 at 2/10 is in the "do not iterate" band; do not tune balance code until a specific gate is chosen as the target. + ## Why this exists separately from p1-29c p1-29c's spec is "raise priority of Settle/Defend/Research when sole-city threatened." That work landed and is correct. The empirical failure mode is "P1 doesn't survive long enough to ACT on those priorities." That's a different code surface and a different design question — it deserves its own objective. diff --git a/.project/objectives/p1-29e-rl-divergence-mining.md b/.project/objectives/p1-29e-rl-divergence-mining.md index 42e5cfac..d5ee0a04 100644 --- a/.project/objectives/p1-29e-rl-divergence-mining.md +++ b/.project/objectives/p1-29e-rl-divergence-mining.md @@ -159,6 +159,18 @@ batch artifacts). Either way the patch completed 0 buildings, so it had no observable effect — and the `own_mil>=2` floor may be exactly wrong for the weakest player. +### CONFIRMED 2026-05-29 (p1-29d re-baseline): the break-out is inert + +The earlier "whether the gate ever fired is unconfirmed" is now resolved. +Apricot batch `20260529_185955` (fresh current-main build, 10 seeds T300, +scored by `tools/p1-survival-score.py`): P1 `mil` snapshot = **0 in 10/10 +seeds**. The break-out's `own_mil >= SOLE_CITY_ECON_MIN_DEFENDERS (2)` floor +therefore **never fires** for the trailing AI — the lever is inert for the +exact population it targets. **Action for next iteration:** the `own_mil>=2` +floor is wrong for the weakest player; either lower it to 1/0 for a +sole-city AI, or gate on "has produced ≥1 defender this game" rather than a +live snapshot count, so the economy interject can actually trigger. + ### Honest status & next steps - **Gate: NOT MET.** No metric movement attributable to this patch. diff --git a/.project/objectives/p2-16-audio-assets.md b/.project/objectives/p2-16-audio-assets.md index 1f5bf6d4..4ea0081a 100644 --- a/.project/objectives/p2-16-audio-assets.md +++ b/.project/objectives/p2-16-audio-assets.md @@ -153,3 +153,35 @@ One acceptance bullet remains substantively un-met. The other ✗ bullets in the ### 2. Acceptance-list audit (housekeeping) - All ✗ bullets except "Live audible smoke" are functionally satisfied per the verification block. Re-audit each against the relocated `public/resources/audio/` tree and flip to ✓ in a single pass when the smoke lands. Do NOT flip any ✗ to ✓ ahead of the smoke — Rail-objective-integrity requires cited evidence per bullet. + +## 2026-05-29 progress (remote-control p2-16 driver) + +The "Live audible smoke" bullet splits into two claims; the **mechanical half +is now closed headless** and the **perceptual half is teed up for a human**: + +- **"routes via the correct mixer bus" — DONE (headless, durable).** Three new + cases in `src/game/engine/tests/unit/test_audio_manager.gd`, verified passing + on apricot (flatpak Godot 4.6.3, `--headless`, GUT, 2026-05-29): + - `test_every_sfx_event_routes_to_sfx_bus` — 75/75 SFX entries declare `bus: SFX`. + - `test_every_music_track_routes_to_music_bus` — 22/22 music tracks declare `bus: Music`. + - `test_play_stream_applies_manifest_bus_to_player` — synthetic in-memory + stream proves `_play_stream` (audio_manager.gd:304) writes the entry's + declared bus onto the pool player (control `Music` + default `SFX`), not a + hardcoded constant. No real `.ogg` import needed — sidesteps the reason the + older wiring tests punted on playback. +- **"every event audibly triggers" — PENDING HUMAN.** Terminates in human + hearing; cannot be self-certified by an autonomous/headless agent. Prepared + checklist (17 EventBus events, expected key+bus pulled from the live + `library.json` manifest, `heard?` left blank): `.project/screenshots/audio-smoke-2026-05-29.md`. +- **Pipeline re-confirmed clean (2026-05-29):** `audio-validate.py` → OK; + `audio-licenses-render.py --check` → in sync (119 rows), allowlist gate intact. +- **Stale-path correction:** this spec + `.project/audio-status.md` reference + `public/games/age-of-dwarves/data/audio.json`, which does not exist. The live + manifest is `public/resources/audio/library.json` (via the `data/audio/manifest.json` + indirection), loaded at `audio_manager.gd:21`. Audit pass should fix these paths. + +**Status stays `in_progress`** — the perceptual gate is irreducibly human (a +person must listen on a host with audio out; note the standing `godot --import` +kernel-panic constraint on plum). Close-out = the human fills the smoke +checklist (ideally with the linked screen-recording) → then flip the +functionally-satisfied ✗ bullets to ✓ in one audited pass. diff --git a/.project/screenshots/audio-smoke-2026-05-29.md b/.project/screenshots/audio-smoke-2026-05-29.md new file mode 100644 index 00000000..7bea429b --- /dev/null +++ b/.project/screenshots/audio-smoke-2026-05-29.md @@ -0,0 +1,127 @@ +# Audio audible smoke — p2-16 acceptance bullet 9 + +**Status:** ⏳ **PENDING HUMAN CAPTURE.** The perceptual half of this bullet +("every event *audibly* triggers") cannot be self-certified by an autonomous +or headless agent — it terminates in human hearing. This file is the prepared +checklist; the `heard?` column is left **blank on purpose** and must be filled +by a person listening on a machine with a real audio device. + +**Mechanical half — already proven headless (do not re-do):** "routes via the +correct mixer bus" is discharged by `test_audio_manager.gd` on apricot +(GUT, flatpak Godot 4.6.3, `--headless`), 2026-05-29: + +| Test | Result | +|---|---| +| `test_every_sfx_event_routes_to_sfx_bus` | ✓ 75/75 SFX entries declare `bus: SFX` | +| `test_every_music_track_routes_to_music_bus` | ✓ 22/22 music tracks declare `bus: Music` | +| `test_play_stream_applies_manifest_bus_to_player` | ✓ `_play_stream` writes the entry's declared bus onto the pool player (control bus `Music` + default `SFX`) | + +So a listener only needs to confirm sound is **audible and on the right +bus** (Master/Music/SFX sliders move it as expected) — the key→bus mapping +itself is machine-verified. + +--- + +## Source of truth + +- **Manifest:** `public/games/age-of-dwarves/data/audio/manifest.json` + (`{source: "resources/audio", includes: true}`) → resolves the cross-theme + library at **`public/resources/audio/library.json`** (schema_version 2, + `sfx{}` + `music.tracks[]`). Loaded at runtime by + `audio_manager.gd:21` (`AUDIO_LIBRARY_PATH`). + > **Stale-path note for the objective audit:** the p2-16 spec and + > `.project/audio-status.md` both reference + > `public/games/age-of-dwarves/data/audio.json`. That file does **not** + > exist; the live manifest is `library.json` at the path above (relocated + > per the post-p1-40 single-source-of-truth data architecture). Expected + > keys below are pulled from the live manifest, not the stale path. +- **Bus → slider mapping:** `settings_manager.gd` — `master_volume`→Master, + `music_volume`→Music, `sfx_volume`→SFX. Buses are created at runtime by + `SettingsManager._ensure_audio_buses()` (no `.tres` layout). +- This checklist references manifest **keys**, never `.ogg` file paths — the + key→file resolution is the manifest's job (SSoT). If an event below fires + no sound and its key IS in `library.json`, that is an asset/import bug; + if the key is absent, that is a manifest bug — fix the manifest, never + paper over it here. + +## How to run — and the open host question + +**The blocker is partly a capability question, not just effort. Resolve this first:** + +- **plum** is the only machine with audio out, but it carries the standing + `godot --import` kernel-panic flag (memory `feedback_no_godot_import_on_plum`). + That flag is **import-specific** — a *desktop play session on an + already-warm tree* (import cache built elsewhere, e.g. via + `scripts/godot-docker.sh --headless --import` which runs import in an + isolated Linux VM) plausibly does **not** trip the GPU/video kext that + panicked. If that holds, this is a 10-minute "someone listen" task on plum. +- **If no machine can safely run the live game with audio out**, the bullet is + blocked on *capability*, not effort — escalate that distinction. The + documented fallback then is **Godot Movie Maker mode on apricot**: + `godot --write-movie out.avi` renders audio deterministically to a file + **without an audio device**, so a small proof scene that fires the 17 + EventBus events in sequence produces a listenable AVI the human plays back + once. (Fallback only — do NOT build the proof scene speculatively; build it + solely if the host question above answers "no audio-capable host exists".) + +Steps for the desktop-listen path: + +1. On the audio-capable host, ensure the import cache is warm (built off-plum + if using plum, per above). +2. Boot the live game (`./run dev` / desktop Godot, **not** the design app, + **not** GUT). Reach `world_map` with at least one human + one AI player so + the perspective-aware branches (own vs rival, victory vs defeat) can fire. +3. Trigger each event in the table (gameplay action, or fire the EventBus + signal from the debugger). Tick `heard?`, confirm the bus, and confirm the + matching volume slider attenuates it. +4. **Durable proof:** capture a short screen-recording with audio at + `.project/screenshots/audio-smoke-2026-05-29.mp4` and link it here. Audio + in the recording is the verifiable artifact; the ticked table alone is not. + +## Checklist — 17 EventBus events + +`heard?` / `bus ok?` / `slider ok?` / `notes` are for the human. Expected +key/bus come from the live manifest. Branching handlers list the candidate +keys; which one fires depends on perspective (own/human vs rival/AI). + +| # | EventBus signal | Expected manifest key(s) | Bus | heard? | bus ok? | slider ok? | notes | +|---|---|---|---|---|---|---|---| +| 1 | `turn_started` | `turn_started` | SFX | ☐ | ☐ | ☐ | | +| 2 | `turn_ended` | `turn_ended` | SFX | ☐ | ☐ | ☐ | | +| 3 | `city_founded` | `city_founded` | SFX | ☐ | ☐ | ☐ | | +| 4 | `city_grew` | `city_grew` | SFX | ☐ | ☐ | ☐ | | +| 5 | `city_starved` | `city_starved` | SFX | ☐ | ☐ | ☐ | | +| 6 | `tech_researched` | `tech_researched` | SFX | ☐ | ☐ | ☐ | | +| 7 | `culture_researched` | `culture_researched` | SFX | ☐ | ☐ | ☐ | | +| 8 | `combat_started` | `unit..attack` → `combat_started` | SFX | ☐ | ☐ | ☐ | categorical per attacker class | +| 9 | `combat_resolved` | `unit..hit` → `combat_hit` | SFX | ☐ | ☐ | ☐ | categorical per defender class | +| 10 | `unit_destroyed` | `unit..death` + `unit_defeated` / `unit_victorious` / `unit_killed` | SFX | ☐ | ☐ | ☐ | species death + perspective sting (human victim / human killer / neither) | +| 11 | `wonder_built` | `wonder_built.own` / `wonder_built.rival` / `wonder_built` | SFX | ☐ | ☐ | ☐ | own=fanfare, rival=distant bell | +| 12 | `era_changed` | `era_advanced` (SFX) + era ambient track (Music) | SFX+Music | ☐ | ☐ | ☐ | era track crossfades over 2 s | +| 13 | `golden_age_started` | `golden_age_swell` (SFX) + `golden_age` (Music) | SFX+Music | ☐ | ☐ | ☐ | on `golden_age_ended`, music stops — verify era track resumes next era | +| 14 | `victory_achieved` (you win) | `victory_fanfare` (SFX) + victory pool (Music) | SFX+Music | ☐ | ☐ | ☐ | pool keyed by victory_type | +| 14b | `victory_achieved` (AI wins) | `defeat_stinger` (SFX) + defeat pool (Music) | SFX+Music | ☐ | ☐ | ☐ | you're being beaten — defeat-by- track | +| 15 | `player_eliminated` (you) | `defeat_stinger` (SFX) + `defeat` (Music) | SFX+Music | ☐ | ☐ | ☐ | | +| 16 | `wild_creature_spawned` | `fauna..spawn` → `wild_spawn` | SFX | ☐ | ☐ | ☐ | categorical per creature class | +| 17 | `weather_event_applied` | `weather.` (e.g. `weather.storm`) | SFX | ☐ | ☐ | ☐ | kind ∈ storm/blizzard/heat_wave/drought/hurricane/tornado | + +### Minimal acceptance path (the spec's short list) +The original bullet calls out a quick path — these are rows 1/2/3/9·10/6/16: +**main_menu UI clicks → found a city → end turn → fight a wild creature → +research a tech.** Filling those five plus confirming slider attenuation is +the acceptance floor; the full 17 above is the thorough pass. + +### Global mixer check (do once) +| Check | ok? | notes | +|---|---|---| +| Master slider → 0 silences everything | ☐ | | +| Music slider → 0 silences music, SFX still audible | ☐ | | +| SFX slider → 0 silences SFX, music still audible | ☐ | | + +--- + +## Sign-off + +- Captured by: ________ Date: ________ Host: ________ +- Recording: `.project/screenshots/audio-smoke-2026-05-29.mp4` (link when captured) +- Result: ☐ all events audible & correctly routed → p2-16 acceptance bullet 9 met diff --git a/src/game/engine/tests/unit/test_audio_manager.gd b/src/game/engine/tests/unit/test_audio_manager.gd index f5b6c2ac..7530a506 100644 --- a/src/game/engine/tests/unit/test_audio_manager.gd +++ b/src/game/engine/tests/unit/test_audio_manager.gd @@ -319,3 +319,92 @@ func test_unknown_entity_chain_does_not_resolve() -> void: AudioManager._sfx_events.has(k), "resolver leaked a fallback for unknown entity: %s" % k ) + + +# ── p2-16 — mixer-bus routing closure ─────────────────────────────────────── +# The p2-16 "live audible smoke" acceptance bullet makes two distinct claims: +# (a) every event audibly triggers — perceptual, human-only, cannot be +# self-certified by a headless test, and +# (b) routes via the correct mixer bus — mechanical, assertable headless. +# These tests discharge (b) so the human smoke checklist only has to cover (a). +# +# The chain that must hold for (b) is: +# handler fires → resolves a manifest key → entry declares the right bus +# → _play_stream applies that bus to the AudioStreamPlayer. +# The connection + resolution links are already covered above +# (test_all_required_signals_are_connected, test_simple_routes_have_manifest_entries, +# the chain-closure tests). The two links these add are the manifest-bus +# declaration (total closure, below) and the playback application (synthetic +# stream, below). Real .ogg files do not load in a `--headless` run without an +# import pass, which is exactly why the older wiring tests punted on playback; +# a synthetic in-memory AudioStreamWAV sidesteps that and lets us assert the +# resulting `player.bus` for real. + + +func test_every_sfx_event_routes_to_sfx_bus() -> void: + # Total closure: whatever key any of the smoke events resolves to, every + # SFX manifest entry must declare the SFX bus. A typo'd `bus` string here + # is the realistic failure mode for "wrong mixer bus", and this catches it + # without needing to enumerate the 17 events individually. + assert_gt(AudioManager._sfx_events.size(), 0, "manifest must expose SFX events") + for key: String in AudioManager._sfx_events.keys(): + var entry: Dictionary = AudioManager._sfx_events[key] + assert_eq( + String(entry.get("bus", "SFX")), + "SFX", + "SFX event '%s' must route to the SFX bus" % key + ) + + +func test_every_music_track_routes_to_music_bus() -> void: + assert_gt(AudioManager._music_tracks.size(), 0, "manifest must expose music tracks") + for track_id: String in AudioManager._music_tracks.keys(): + var entry: Dictionary = AudioManager._music_tracks[track_id] + assert_eq( + String(entry.get("bus", "Music")), + "Music", + "music track '%s' must route to the Music bus" % track_id + ) + + +func test_play_stream_applies_manifest_bus_to_player() -> void: + # Proves the last link: `_play_stream` writes the entry's declared `bus` + # onto the pool player (audio_manager.gd:304) rather than hardcoding a + # constant. We feed a synthetic in-memory stream (no file, no import) and a + # control entry whose bus deliberately differs from the SFX default, then + # assert the player that received the stream carries the declared bus. + var stream: AudioStreamWAV = _make_silent_wav() + # Control bus chosen to differ from the "SFX" default so a hardcoded + # `player.bus = "SFX"` regression would fail this assertion. + var entry: Dictionary = {"bus": "Music", "volume_db": -12.0} + AudioManager._play_stream(stream, entry) + var landed: AudioStreamPlayer = _pool_player_for(stream) + assert_not_null(landed, "synthetic stream must land on a pool player") + if landed != null: + assert_eq(landed.bus, &"Music", "_play_stream must apply the entry's declared bus") + # And the SFX default path: an entry with no explicit bus falls back to SFX. + var stream2: AudioStreamWAV = _make_silent_wav() + AudioManager._play_stream(stream2, {"volume_db": -6.0}) + var landed2: AudioStreamPlayer = _pool_player_for(stream2) + assert_not_null(landed2, "second synthetic stream must land on a pool player") + if landed2 != null: + assert_eq(landed2.bus, &"SFX", "_play_stream must default to the SFX bus") + + +func _make_silent_wav() -> AudioStreamWAV: + # 0.1 s of 16-bit mono silence at 44.1 kHz — enough for play() to accept + # the stream on the dummy audio driver without touching the filesystem. + var wav: AudioStreamWAV = AudioStreamWAV.new() + wav.format = AudioStreamWAV.FORMAT_16_BITS + wav.mix_rate = 44100 + wav.stereo = false + wav.data = PackedByteArray() + wav.data.resize(int(44100 * 0.1) * 2) # 16-bit -> 2 bytes/sample + return wav + + +func _pool_player_for(stream: AudioStream) -> AudioStreamPlayer: + for player: AudioStreamPlayer in AudioManager._sfx_pool: + if player.stream == stream: + return player + return null diff --git a/tools/p1-survival-score.py b/tools/p1-survival-score.py new file mode 100755 index 00000000..7e6f0d9a --- /dev/null +++ b/tools/p1-survival-score.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +"""p1-survival-score.py — score a 10-seed autoplay batch for objective p1-29d. + +p1-29d carries two contradictory gate definitions (see the objective file): + + SURVIVAL (body Acceptance bullet): >=7/10 *alive-aware* seeds where the + trailing AI (P1, slot 1) ends alive with tier_peak >= 2. + CONVERGENCE (title + dispatch): the trailing AI is eliminated OR stalled + *before T100* in 10/10 seeds (stable late-game pacing). + +Rather than pick, this scorer prints the raw per-seed table and BOTH gate +verdicts as derived views, so a human/orchestrator decides which is canonical. + +The convergence gate needs P1's *elimination turn*, not the game-end turn — +these are multi-clan domination games, so P1 can die well before the winner +achieves domination. We therefore scan the full turn_stats stream for the +first turn P1's city count hits 0 (and stays 0), not just the final line. + +Usage: + tools/p1-survival-score.py # dir containing game_*_seedN/ + +stdlib only. +""" +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path + +TRAILING_SLOT = "1" # P1 = slot 1 is the designated trailing AI per p1-29d +LEADER_SLOT = "0" +T100 = 100 + + +def _player(stats: dict, slot: str) -> dict | None: + """player_stats is an object keyed by player-index string.""" + if not isinstance(stats, dict): + return None + return stats.get(slot) + + +def scan_seed(game_dir: Path) -> dict | None: + ts = game_dir / "turn_stats.jsonl" + if not ts.exists(): + return None + lines = [ln for ln in ts.read_text().splitlines() if ln.strip()] + if not lines: + return None + + m = re.search(r"seed(\d+)", game_dir.name) + seed = int(m.group(1)) if m else -1 + + last = json.loads(lines[-1]) + pstats = last.get("player_stats", {}) + p1 = _player(pstats, TRAILING_SLOT) or {} + p0 = _player(pstats, LEADER_SLOT) or {} + + final_turn = last.get("turn") + outcome = last.get("outcome") + victory_type = last.get("victory_type") + winner_index = last.get("winner_index") + + p1_tp = p1.get("tier_peak", 0) or 0 + p0_tp = p0.get("tier_peak", 0) or 0 + p1_cities_end = p1.get("cities", 0) or 0 + p1_lost = p1.get("cities_lost", 0) or 0 + p1_alive = p1_cities_end >= 1 + + # P1 elimination turn: first turn its cities hit 0 and never recover. + p1_elim_turn = None + for ln in lines: + try: + rec = json.loads(ln) + except json.JSONDecodeError: + continue + p1t = _player(rec.get("player_stats", {}), TRAILING_SLOT) + if p1t is None: + continue + turn = rec.get("turn") + cities = p1t.get("cities", 0) or 0 + if cities == 0 and not p1t.get("cities_founded_pending"): + # candidate elimination; require it to be terminal (stays 0) + if p1_elim_turn is None: + p1_elim_turn = turn + else: + p1_elim_turn = None # recovered → reset + # If alive at end, no elimination + if p1_alive: + p1_elim_turn = None + + return { + "seed": seed, + "final_turn": final_turn, + "outcome": outcome, + "victory_type": victory_type, + "winner_index": winner_index, + "p0_tp": p0_tp, + "p1_tp": p1_tp, + "p1_cities_end": p1_cities_end, + "p1_cities_lost": p1_lost, + "p1_alive": p1_alive, + "p1_elim_turn": p1_elim_turn, + "p1_mil": p1.get("mil", 0), + "p1_kills": p1.get("kills", 0), + "p1_units_lost": p1.get("units_lost", 0), + "p1_pop": p1.get("pop", 0), + } + + +def main(argv: list[str]) -> int: + if len(argv) != 2: + print(__doc__) + return 2 + root = Path(argv[1]) + if not root.exists(): + print(f"no such dir: {root}", file=sys.stderr) + return 2 + + def _seed_of(d: Path) -> int: + m = re.search(r"seed(\d+)", d.name) + return int(m.group(1)) if m else -1 + + game_dirs = sorted( + {p.parent for p in root.rglob("turn_stats.jsonl")}, key=_seed_of) + rows = [r for d in game_dirs if (r := scan_seed(d))] + if not rows: + print(f"no parseable seeds under {root}", file=sys.stderr) + return 1 + rows.sort(key=lambda r: r["seed"]) + + # ── Raw per-seed table ──────────────────────────────────────────── + print(f"\n=== p1-29d batch scoring: {root} ===") + print(f"{len(rows)} seeds\n") + hdr = ("seed", "endT", "outcome", "P0tp", "P1tp", "P1cit", "P1lost", + "P1alive", "P1elimT", "P1mil", "P1kills") + print("{:>4} {:>5} {:>11} {:>4} {:>4} {:>5} {:>6} {:>7} {:>7} {:>5} {:>6}".format(*hdr)) + for r in rows: + print("{:>4} {:>5} {:>11} {:>4} {:>4} {:>5} {:>6} {:>7} {:>7} {:>5} {:>6}".format( + r["seed"], str(r["final_turn"]), str(r["outcome"])[:11], + r["p0_tp"], r["p1_tp"], r["p1_cities_end"], r["p1_cities_lost"], + "yes" if r["p1_alive"] else "no", + str(r["p1_elim_turn"]) if r["p1_elim_turn"] is not None else "-", + r["p1_mil"], r["p1_kills"])) + + n = len(rows) + + # ── Gate A: SURVIVAL (file Acceptance) ──────────────────────────── + # alive-aware seed with P1 tier_peak >= 2 (P1 ends alive AND developed) + survival_pass = [r for r in rows if r["p1_alive"] and r["p1_tp"] >= 2] + # also report the stricter both-developed alive-aware count (p1-29a def) + both_dev = [r for r in rows if r["p0_tp"] >= 2 and r["p1_tp"] >= 2 and r["p1_alive"]] + print(f"\n--- GATE A (SURVIVAL, file): P1 alive AND tier_peak>=2 ---") + print(f" {len(survival_pass)}/{n} seeds (need >=7) -> " + f"{'PASS' if len(survival_pass) >= 7 else 'FAIL'}") + print(f" (stricter both-developed alive-aware: {len(both_dev)}/{n})") + print(f" seeds passing: {[r['seed'] for r in survival_pass]}") + + # ── Gate B: CONVERGENCE (title/dispatch) ────────────────────────── + # P1 eliminated before T100 OR stalled (alive but never developed past tp1) + conv_rows = [] + for r in rows: + elim_before_t100 = (not r["p1_alive"]) and ( + r["p1_elim_turn"] is not None and r["p1_elim_turn"] <= T100) + # fallback: if elim turn unknown but dead and game ended by T100 + if (not r["p1_alive"]) and r["p1_elim_turn"] is None \ + and isinstance(r["final_turn"], int) and r["final_turn"] <= T100: + elim_before_t100 = True + stalled = r["p1_alive"] and r["p1_tp"] <= 1 + converged = elim_before_t100 or stalled + reason = ("elim<=T100" if elim_before_t100 + else "stalled(alive,tp<=1)" if stalled + else ("elim>T100" if not r["p1_alive"] else "alive,developing")) + conv_rows.append((r["seed"], converged, reason)) + conv_pass = [c for c in conv_rows if c[1]] + print(f"\n--- GATE B (CONVERGENCE, title/dispatch): " + f"P1 eliminated<=T100 OR stalled, 10/10 ---") + print(f" {len(conv_pass)}/{n} seeds (need {n}/{n}) -> " + f"{'PASS' if len(conv_pass) == n else 'FAIL'}") + for seed, ok, reason in conv_rows: + print(f" s{seed}: {'OK ' if ok else 'NO '} ({reason})") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv))