feat(@magic-civilization): ✨ add ai action dispatch and tactical projection tests

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-05-12 17:18:37 -07:00 · 2026-05-12 17:18:37 -07:00 · 98a98155d1
commit 98a98155d1
parent e48ef4c115
1 changed files with 543 additions and 1 deletions
--- a/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs
+++ b/src/simulator/crates/mc-player-api/tests/full_game_transcript.rs
@ -54,7 +54,8 @@ use std::panic::AssertUnwindSafe;
 use std::path::{Path, PathBuf};

 use mc_player_api::action::{PlayerAction, PromotionPick};
-use mc_player_api::projection::project_view;
+use mc_player_api::dispatch::apply_ai_action;
+use mc_player_api::projection::{project_tactical, project_view};
 use mc_player_api::wire::{Event, OkMarker, Request, Response};
 use mc_player_api::{apply_action, PlayerView};

@ -1267,3 +1268,544 @@ fn write_long_recap(out_dir: &Path, summaries: &[TurnSummary], outcome: &DriveOu

    fs::write(&path, md).expect("write long recap");
 }
+
+// ═══════════════════════════════════════════════════════════════════════
+//  Claude-as-strong-AI run (2026-05-12, post-Bug-5 fix)
+// ═══════════════════════════════════════════════════════════════════════
+//
+// HONEST FRAMING. The original task brief said "wire the production
+// `mc_ai::run_ai_turn` MCTS into Claude's policy slot ... give Claude a
+// higher rollout budget than the AI slots". After reading the code:
+//
+//   - `mc_ai::tactical::run_ai_turn` is NOT MCTS. It's a deterministic
+//     heuristic pipeline (`decide_tactical_actions`) — movement →
+//     combat_predict → settle → production → citizen. The actual MCTS
+//     lives in `mc-ai/src/mcts.rs` and is not the path AI slots take in
+//     `dispatch::drive_ai_slot`.
+//
+//   - `run_ai_turn(state, player, weights, seed) -> Vec<Action>` has NO
+//     rollout-budget parameter. The only knobs that differentiate one
+//     slot from another are `ScoringWeights` and `seed`.
+//
+//   - The bench harness's `stamp_personality` only sets `clan_id` +
+//     three promotion weights. It does NOT load
+//     `ScoringWeights::from_personality_json` for the named clan, so
+//     slots 1 and 2 actually run with `ScoringWeights::default()` —
+//     blackhammer/deepforge are cosmetic labels in this run.
+//
+// The legitimate experiment we CAN run, then, is:
+//
+//   - Stamp Claude (slot 0) with a real per-clan ScoringWeights —
+//     `blackhammer` is the natural choice since its strategic axes
+//     (aggression 9, expansion 6, production 7) skew hardest toward the
+//     last_survivor victory the 233-turn baseline hit.
+//   - Leave slots 1 + 2 on `ScoringWeights::default()` (the "easy AI"
+//     baseline that lost to itself in the prior run because of seed
+//     variance — see `2026-05-12-claude-vs-easy-ai-250-turn/recap.md`).
+//   - Use the same `run_ai_turn` pipeline for all three slots — Claude's
+//     advantage is purely the stronger weights, not a different
+//     algorithm.
+//
+// If Claude wins, we have evidence the simulation responds to scoring-
+// weight axes and the personality system is doing real work. If Claude
+// loses despite blackhammer weights, then either (a) the heuristic
+// pipeline is insensitive to weight magnitude, or (b) turn-order /
+// starting-position effects dominate.
+
+/// Build the blackhammer `ScoringWeights` from the canonical
+/// `ai_personalities.json` shipped with Age of Dwarves. Inlined so the
+/// test does not depend on the filesystem path layout — if the file
+/// rotates we crash loudly in the test, not silently in production.
+fn claude_genius_weights() -> mc_ai::evaluator::ScoringWeights {
+    // CARGO_MANIFEST_DIR is the crate dir; repo root is 4 levels up.
+    let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    let repo_root = crate_dir
+        .parent()
+        .and_then(|p| p.parent())
+        .and_then(|p| p.parent())
+        .and_then(|p| p.parent())
+        .expect("repo root resolves")
+        .to_path_buf();
+    let json_path =
+        repo_root.join("public/games/age-of-dwarves/data/ai_personalities.json");
+    let json = fs::read_to_string(&json_path)
+        .unwrap_or_else(|e| panic!("read {}: {e}", json_path.display()));
+    mc_ai::evaluator::ScoringWeights::from_personality_json("blackhammer", &json)
+        .expect("blackhammer must be a known clan in ai_personalities.json")
+}
+
+/// Stable signature for an `mc_ai::tactical::Action` — mirrors
+/// `action_signature` for `PlayerAction` so the recap frequency table
+/// reads the same way.
+fn ai_action_signature(action: &mc_ai::tactical::Action) -> String {
+    use mc_ai::tactical::Action as A;
+    match action {
+        A::MoveUnit { unit_id, to_hex } => format!("move:{unit_id}:{to_hex:?}"),
+        A::AttackTarget { attacker_id, target_id, .. } => {
+            format!("attack:{attacker_id}->{target_id}")
+        }
+        A::Fortify { unit_id } => format!("fortify:{unit_id}"),
+        A::Heal { unit_id } => format!("heal:{unit_id}"),
+        A::FoundCity { settler_id, .. } => format!("found:{settler_id}"),
+        A::SetProduction { city_id, item_id } => {
+            if is_building_id(item_id) {
+                format!("queue_building:{city_id}:{item_id}")
+            } else {
+                format!("queue_unit:{city_id}:{item_id}")
+            }
+        }
+        A::EnqueueBuild { city_id, item_id, .. } => {
+            if is_building_id(item_id) {
+                format!("queue_building:{city_id}:{item_id}")
+            } else {
+                format!("queue_unit:{city_id}:{item_id}")
+            }
+        }
+        A::Scout { unit_id, to_hex } => format!("scout:{unit_id}:{to_hex:?}"),
+        A::IssuePatrol { unit_id, .. } => format!("patrol:{unit_id}"),
+        A::PromotionPicked { unit_id, promotion_id } => {
+            format!("promote:{unit_id}:{promotion_id}")
+        }
+        A::AssignCitizen { .. } => "assign_citizen".into(),
+        A::DeploySiege { .. } => "deploy_siege".into(),
+        A::PackSiege { .. } => "pack_siege".into(),
+        A::Bombard { .. } => "bombard".into(),
+    }
+}
+
+/// One Claude-driven tactical action for the recap.
+struct ClaudeTacticalDecision {
+    turn: u32,
+    signature: String,
+    events: Vec<Event>,
+}
+
+/// Per-turn summary for the strong-Claude run.
+struct StrongTurnSummary {
+    turn: u32,
+    claude_decisions: Vec<ClaudeTacticalDecision>,
+    endturn_events: Vec<Event>,
+    ai_actions_applied: Vec<(u8, u32)>,
+    score_snapshot: Vec<(u8, i32, u32, u32)>,
+}
+
+/// Same deterministic seed derivation `drive_ai_slot` uses internally,
+/// so Claude's turn picks would byte-identical-match what slot 0 would
+/// have produced if it were driven by the production AI path.
+fn seed_for_claude_turn(turn: u32) -> u64 {
+    (turn as u64)
+        .wrapping_mul(0x9E37_79B9_7F4A_7C15)
+}
+
+/// Drive a Claude-as-strong-AI game. Slot 0 (Claude) runs the same
+/// `run_ai_turn` tactical pipeline as the AI slots but with a stronger
+/// `ScoringWeights` (blackhammer's axes). Slots 1 and 2 keep
+/// `ScoringWeights::default()` — the "easy AI" baseline. After Claude's
+/// action chain is dispatched the driver issues a normal `EndTurn`
+/// which routes through `apply_end_turn` → `drive_ai_slot` for slots 1
+/// and 2 unchanged.
+fn drive_strong_claude_game(
+    out_dir: &Path,
+    max_turns: u32,
+) -> (Vec<StrongTurnSummary>, DriveOutcome) {
+    fs::create_dir_all(out_dir).expect("create out dir");
+    let transcript_path = out_dir.join("transcript.jsonl");
+    let mut transcript = fs::File::create(&transcript_path).expect("create transcript");
+
+    let mut state = build_3_player_state_like_harness();
+    // Stamp blackhammer weights onto Claude (slot 0). Slots 1 + 2 keep
+    // the default weights they got from `add_player_militarist_inline`.
+    let claude_weights = claude_genius_weights();
+    state.players[0].scoring_weights = claude_weights.clone();
+
+    let mut next_req_id: u64 = 1;
+    let mut summaries: Vec<StrongTurnSummary> = Vec::new();
+    let mut consecutive_endturn_only = 0u32;
+    let mut outcome = DriveOutcome::Completed;
+
+    'game: for turn in 0..max_turns {
+        eprintln!("[strong-claude] starting turn {turn}");
+        if SNAPSHOT_TURNS.contains(&turn) {
+            let view = project_view(&state, 0, false);
+            let snapshot_path = out_dir.join(format!("state-turn-{turn:02}.json"));
+            let json = serde_json::to_string_pretty(&view).expect("snapshot serialise");
+            fs::write(&snapshot_path, json).expect("write snapshot");
+        }
+
+        let mut summary = StrongTurnSummary {
+            turn,
+            claude_decisions: Vec::new(),
+            endturn_events: Vec::new(),
+            ai_actions_applied: Vec::new(),
+            score_snapshot: Vec::new(),
+        };
+
+        // ── Claude's turn: project tactical, run the tactical pipeline,
+        // dispatch each `mc_ai::Action` directly via `apply_ai_action`.
+        let view_req_id = next_req_id;
+        next_req_id += 1;
+        let view_req = Request::View { id: Some(view_req_id) };
+        write_jsonl(&mut transcript, &view_req);
+        let view = project_view(&state, 0, false);
+        let view_resp = Response::Ok {
+            id: Some(view_req_id),
+            ok: OkMarker,
+            events: Vec::new(),
+            view: view.clone(),
+        };
+        write_jsonl(&mut transcript, &view_resp);
+
+        let mut tactical = project_tactical(&state, 0);
+        tactical.current_player = 0;
+        let seed = seed_for_claude_turn(turn);
+        let actions =
+            mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed);
+
+        let mut took_real_action = false;
+        for action in actions {
+            let signature = ai_action_signature(&action);
+            // Dispatch under `catch_unwind` for the same residual-overflow
+            // safety the original `drive_game` carries.
+            let dispatch = std::panic::catch_unwind(AssertUnwindSafe(|| {
+                apply_ai_action(&mut state, 0, action)
+            }));
+            let result = match dispatch {
+                Ok(r) => r,
+                Err(payload) => {
+                    let msg = panic_payload_to_string(&payload);
+                    eprintln!("[panic] strong-claude apply_ai_action at turn {turn}: {msg}");
+                    use mc_player_api::wire::{HarnessNotification, Notification};
+                    let note = Notification::Harness(HarnessNotification::ProtocolError {
+                        message: format!(
+                            "apply_ai_action panic at turn {turn}: {msg}"
+                        ),
+                    });
+                    write_jsonl(&mut transcript, &note);
+                    outcome = DriveOutcome::EndTurnPanic { turn, message: msg };
+                    summaries.push(summary);
+                    break 'game;
+                }
+            };
+            // Any dispatched tactical action counts as activity for the
+            // stuck-detector — unit-verb actions return `Ok(vec![])`
+            // synchronously (events batch to EndTurn), so checking
+            // `!events.is_empty()` would always trip Stuck after the
+            // first 10 quiet turns even when Claude is moving 40+ units.
+            took_real_action = true;
+            match result {
+                Ok(events) => {
+                    summary.claude_decisions.push(ClaudeTacticalDecision {
+                        turn,
+                        signature: signature.clone(),
+                        events: events.clone(),
+                    });
+                }
+                Err(_err) => {
+                    // Per-action errors (UnknownUnit, IllegalAction) are
+                    // tolerated — the production `drive_ai_slot` has the
+                    // same posture. Still log the attempt for the recap.
+                    summary.claude_decisions.push(ClaudeTacticalDecision {
+                        turn,
+                        signature: signature.clone(),
+                        events: Vec::new(),
+                    });
+                }
+            }
+        }
+
+        // ── End-of-turn: route through the normal apply_action(EndTurn)
+        // path so slots 1+2 run via `drive_ai_slot` unmodified.
+        let act_req_id = next_req_id;
+        next_req_id += 1;
+        let act_req = Request::Act {
+            id: Some(act_req_id),
+            action: PlayerAction::EndTurn,
+        };
+        write_jsonl(&mut transcript, &act_req);
+        let dispatch = std::panic::catch_unwind(AssertUnwindSafe(|| {
+            apply_action(&mut state, 0, &PlayerAction::EndTurn)
+        }));
+        let result = match dispatch {
+            Ok(r) => r,
+            Err(payload) => {
+                let msg = panic_payload_to_string(&payload);
+                eprintln!("[panic] strong-claude EndTurn at turn {turn}: {msg}");
+                use mc_player_api::wire::{HarnessNotification, Notification};
+                let note = Notification::Harness(HarnessNotification::ProtocolError {
+                    message: format!("EndTurn panic at turn {turn}: {msg}"),
+                });
+                write_jsonl(&mut transcript, &note);
+                outcome = DriveOutcome::EndTurnPanic { turn, message: msg };
+                summaries.push(summary);
+                break 'game;
+            }
+        };
+        let post_view = project_view(&state, 0, false);
+        match &result {
+            Ok(events) => {
+                let resp = Response::Ok {
+                    id: Some(act_req_id),
+                    ok: OkMarker,
+                    events: events.clone(),
+                    view: post_view.clone(),
+                };
+                write_jsonl(&mut transcript, &resp);
+                summary.endturn_events = events.clone();
+                for ev in events {
+                    if let Event::AiTurnCompleted { player, actions_applied } = ev {
+                        summary.ai_actions_applied.push((*player, *actions_applied));
+                    }
+                }
+            }
+            Err(err) => {
+                use mc_player_api::wire::ErrMarker;
+                let resp = Response::Err {
+                    id: Some(act_req_id),
+                    ok: ErrMarker,
+                    error: err.clone(),
+                };
+                write_jsonl(&mut transcript, &resp);
+            }
+        }
+
+        // Score snapshot post-EndTurn.
+        for (p_idx, p) in state.players.iter().enumerate() {
+            summary.score_snapshot.push((
+                p_idx as u8,
+                p.gold,
+                p.cities.len() as u32,
+                p.units.len() as u32,
+            ));
+        }
+
+        if took_real_action {
+            consecutive_endturn_only = 0;
+        } else {
+            consecutive_endturn_only += 1;
+        }
+        let is_stuck = consecutive_endturn_only >= STUCK_TURN_THRESHOLD;
+        let game_over = summary
+            .endturn_events
+            .iter()
+            .any(|e| matches!(e, Event::GameOver { .. }));
+        summaries.push(summary);
+        if is_stuck {
+            outcome = DriveOutcome::Stuck(turn);
+            break 'game;
+        }
+        if game_over {
+            outcome = DriveOutcome::NaturalGameOver(turn);
+            break 'game;
+        }
+    }
+    (summaries, outcome)
+}
+
+/// Strong-Claude artifact dir.
+fn strong_claude_dir() -> PathBuf {
+    let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    let repo_root = crate_dir
+        .parent()
+        .and_then(|p| p.parent())
+        .and_then(|p| p.parent())
+        .and_then(|p| p.parent())
+        .expect("repo root resolves")
+        .to_path_buf();
+    repo_root.join(".local/demo-runs/2026-05-12-claude-mcts-vs-easy-ai")
+}
+
+/// Recap for the strong-Claude run. Mirrors `write_long_recap` shape so
+/// the two artifacts are diff-able side-by-side.
+fn write_strong_claude_recap(
+    out_dir: &Path,
+    summaries: &[StrongTurnSummary],
+    outcome: &DriveOutcome,
+    horizon: u32,
+) {
+    use std::collections::BTreeMap;
+    let path = out_dir.join("recap.md");
+    let mut md = String::new();
+    md.push_str("# Claude-as-Strong-AI Transcript — 2026-05-12\n\n");
+    md.push_str(
+        "**Source**: \
+         `mc-player-api/tests/full_game_transcript.rs::claude_mcts_vs_two_easy_ais_transcript`\n\n",
+    );
+    md.push_str("**Construction**:\n\n");
+    md.push_str(
+        "- Slot 0 (Claude): blackhammer `ScoringWeights` from \
+         `public/games/age-of-dwarves/data/ai_personalities.json`, \
+         running `mc_ai::tactical::run_ai_turn` (the same tactical \
+         heuristic the AI slots use).\n",
+    );
+    md.push_str(
+        "- Slots 1 + 2 (AIs): `ScoringWeights::default()` baseline, \
+         driven by the production `apply_end_turn` → `drive_ai_slot` \
+         path unchanged.\n\n",
+    );
+    md.push_str(
+        "**Why not real MCTS?** `mc_ai::tactical::run_ai_turn` is the \
+         deterministic heuristic pipeline (movement → combat_predict → \
+         settle → production → citizen), not MCTS. The MCTS code in \
+         `mc-ai/src/mcts.rs` is not on the AI-slot turn path in the \
+         current dispatch wiring — the task brief misnamed the function. \
+         Documented in module comment block for `claude_genius_weights`.\n\n",
+    );
+    md.push_str(&format!("**Horizon**: {} turns (ceiling)\n\n", horizon));
+    md.push_str(&format!("**Turns played**: {}\n\n", summaries.len()));
+    md.push_str(&format!("**Termination**: {:?}\n\n", outcome));
+
+    // Victory outcome up front — this is the headline answer.
+    md.push_str("## Victory outcome\n\n");
+    let game_over_event: Option<&Event> = summaries
+        .iter()
+        .flat_map(|s| s.endturn_events.iter())
+        .find(|e| matches!(e, Event::GameOver { .. }));
+    match game_over_event {
+        Some(Event::GameOver { winner, victory_type }) => {
+            let claude_won = *winner == 0;
+            md.push_str(&format!(
+                "- `Event::GameOver` fired: winner=**{}**, victory_type=**{}**\n",
+                winner, victory_type
+            ));
+            md.push_str(&format!(
+                "- Claude (slot 0) result: **{}**\n\n",
+                if claude_won { "WIN" } else { "LOSS" }
+            ));
+        }
+        _ => {
+            md.push_str(&format!(
+                "- No `Event::GameOver` fired. Final turn = {}, outcome = `{:?}`.\n\n",
+                summaries.last().map(|s| s.turn).unwrap_or(0),
+                outcome
+            ));
+        }
+    }
+
+    // Final-score table — direct head-to-head.
+    md.push_str("## Final scores (all slots)\n\n");
+    md.push_str("| slot | gold | cities | units |\n");
+    md.push_str("|------|------|--------|-------|\n");
+    if let Some(last) = summaries.last() {
+        for (slot, gold, cities, units) in &last.score_snapshot {
+            let label = if *slot == 0 { "0 (Claude/blackhammer)" } else { "AI (default weights)" };
+            md.push_str(&format!(
+                "| {} {} | {} | {} | {} |\n",
+                slot, label, gold, cities, units
+            ));
+        }
+    }
+    md.push_str("\n");
+
+    // Per-25-turn checkpoints, slot 0.
+    md.push_str("## Per-25-turn checkpoints (slot 0 = Claude)\n\n");
+    md.push_str("| turn | gold | cities | units |\n");
+    md.push_str("|------|------|--------|-------|\n");
+    let checkpoint_turns: Vec<u32> = (0..=horizon).step_by(25).collect();
+    for ct in &checkpoint_turns {
+        if let Some(summary) = summaries.iter().find(|s| s.turn == *ct) {
+            if let Some((_, gold, cities, units)) =
+                summary.score_snapshot.iter().find(|(slot, _, _, _)| *slot == 0)
+            {
+                md.push_str(&format!("| {} | {} | {} | {} |\n", ct, gold, cities, units));
+            }
+        }
+    }
+    md.push_str("\n");
+
+    // Aggregate counts across the whole run.
+    let mut techs = 0u32;
+    let mut buildings_done = 0u32;
+    let mut units_killed = 0u32;
+    let mut cities_founded = 0u32;
+    let mut combat_resolved = 0u32;
+    for s in summaries {
+        for e in s.endturn_events.iter().chain(
+            s.claude_decisions.iter().flat_map(|d| d.events.iter()),
+        ) {
+            match e {
+                Event::TechResearched { .. } => techs += 1,
+                Event::CityBuildingCompleted { .. } => buildings_done += 1,
+                Event::UnitDestroyed { .. } => units_killed += 1,
+                Event::CityFounded { .. } => cities_founded += 1,
+                Event::CombatResolved { .. } => combat_resolved += 1,
+                _ => {}
+            }
+        }
+    }
+    md.push_str("## Total counts over the full run\n\n");
+    md.push_str(&format!("- Techs researched: {}\n", techs));
+    md.push_str(&format!("- Buildings completed: {}\n", buildings_done));
+    md.push_str(&format!("- Combat resolutions: {}\n", combat_resolved));
+    md.push_str(&format!("- Units killed: {}\n", units_killed));
+    md.push_str(&format!("- Cities founded: {}\n\n", cities_founded));
+
+    // Action-signature frequency for Claude's tactical chain.
+    md.push_str("## Claude action-signature frequency\n\n");
+    let mut freq: BTreeMap<String, u32> = BTreeMap::new();
+    for s in summaries {
+        for d in &s.claude_decisions {
+            let head = d.signature.split(':').next().unwrap_or(&d.signature).to_string();
+            *freq.entry(head).or_insert(0) += 1;
+        }
+    }
+    md.push_str("| action | count |\n|--------|-------|\n");
+    for (k, v) in &freq {
+        md.push_str(&format!("| `{}` | {} |\n", k, v));
+    }
+    md.push_str("\n");
+
+    fs::write(&path, md).expect("write strong-claude recap");
+}
+
+/// 500-turn Claude-as-strong-AI test. `#[ignore]`d like
+/// `long_game_transcript`; run via:
+///
+/// ```sh
+/// cargo test -p mc-player-api --test full_game_transcript -- \
+///     --ignored claude_mcts_vs_two_easy_ais_transcript --nocapture
+/// ```
+///
+/// Note the test name preserves the original task brief's wording
+/// (`claude_mcts_vs_two_easy_ais`) even though the implementation runs
+/// the tactical heuristic rather than MCTS — the rename would break
+/// the surface the brief asked for. The recap and module comment make
+/// the actual semantics explicit.
+#[test]
+#[ignore = "500-turn strong-Claude run; invoke via --ignored"]
+fn claude_mcts_vs_two_easy_ais_transcript() {
+    const STRONG_HORIZON: u32 = 500;
+    let out_dir = strong_claude_dir();
+    let _ = fs::remove_dir_all(&out_dir);
+    let (summaries, outcome) = drive_strong_claude_game(&out_dir, STRONG_HORIZON);
+    write_strong_claude_recap(&out_dir, &summaries, &outcome, STRONG_HORIZON);
+
+    // Sanity checks.
+    assert!(
+        !summaries.is_empty(),
+        "strong-claude run produced zero turns"
+    );
+    let transcript_path = out_dir.join("transcript.jsonl");
+    assert!(
+        transcript_path.exists(),
+        "transcript missing at {}",
+        transcript_path.display()
+    );
+
+    // Surface the headline result on the test log.
+    let game_over = summaries
+        .iter()
+        .flat_map(|s| s.endturn_events.iter())
+        .find_map(|e| match e {
+            Event::GameOver { winner, victory_type } => {
+                Some((*winner, victory_type.clone()))
+            }
+            _ => None,
+        });
+    eprintln!(
+        "[strong-claude] turns={} outcome={:?} game_over={:?} artifact={}",
+        summaries.len(),
+        outcome,
+        game_over,
+        out_dir.display()
+    );
+}