feat(@magic-civilization): add ai action dispatch and tactical projection tests

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-05-12 17:18:37 -07:00
parent e48ef4c115
commit 98a98155d1

View file

@ -54,7 +54,8 @@ use std::panic::AssertUnwindSafe;
use std::path::{Path, PathBuf};
use mc_player_api::action::{PlayerAction, PromotionPick};
use mc_player_api::projection::project_view;
use mc_player_api::dispatch::apply_ai_action;
use mc_player_api::projection::{project_tactical, project_view};
use mc_player_api::wire::{Event, OkMarker, Request, Response};
use mc_player_api::{apply_action, PlayerView};
@ -1267,3 +1268,544 @@ fn write_long_recap(out_dir: &Path, summaries: &[TurnSummary], outcome: &DriveOu
fs::write(&path, md).expect("write long recap");
}
// ═══════════════════════════════════════════════════════════════════════
// Claude-as-strong-AI run (2026-05-12, post-Bug-5 fix)
// ═══════════════════════════════════════════════════════════════════════
//
// HONEST FRAMING. The original task brief said "wire the production
// `mc_ai::run_ai_turn` MCTS into Claude's policy slot ... give Claude a
// higher rollout budget than the AI slots". After reading the code:
//
// - `mc_ai::tactical::run_ai_turn` is NOT MCTS. It's a deterministic
// heuristic pipeline (`decide_tactical_actions`) — movement →
// combat_predict → settle → production → citizen. The actual MCTS
// lives in `mc-ai/src/mcts.rs` and is not the path AI slots take in
// `dispatch::drive_ai_slot`.
//
// - `run_ai_turn(state, player, weights, seed) -> Vec<Action>` has NO
// rollout-budget parameter. The only knobs that differentiate one
// slot from another are `ScoringWeights` and `seed`.
//
// - The bench harness's `stamp_personality` only sets `clan_id` +
// three promotion weights. It does NOT load
// `ScoringWeights::from_personality_json` for the named clan, so
// slots 1 and 2 actually run with `ScoringWeights::default()` —
// blackhammer/deepforge are cosmetic labels in this run.
//
// The legitimate experiment we CAN run, then, is:
//
// - Stamp Claude (slot 0) with a real per-clan ScoringWeights —
// `blackhammer` is the natural choice since its strategic axes
// (aggression 9, expansion 6, production 7) skew hardest toward the
// last_survivor victory the 233-turn baseline hit.
// - Leave slots 1 + 2 on `ScoringWeights::default()` (the "easy AI"
// baseline that lost to itself in the prior run because of seed
// variance — see `2026-05-12-claude-vs-easy-ai-250-turn/recap.md`).
// - Use the same `run_ai_turn` pipeline for all three slots — Claude's
// advantage is purely the stronger weights, not a different
// algorithm.
//
// If Claude wins, we have evidence the simulation responds to scoring-
// weight axes and the personality system is doing real work. If Claude
// loses despite blackhammer weights, then either (a) the heuristic
// pipeline is insensitive to weight magnitude, or (b) turn-order /
// starting-position effects dominate.
/// Build the blackhammer `ScoringWeights` from the canonical
/// `ai_personalities.json` shipped with Age of Dwarves. Inlined so the
/// test does not depend on the filesystem path layout — if the file
/// rotates we crash loudly in the test, not silently in production.
fn claude_genius_weights() -> mc_ai::evaluator::ScoringWeights {
// CARGO_MANIFEST_DIR is the crate dir; repo root is 4 levels up.
let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let repo_root = crate_dir
.parent()
.and_then(|p| p.parent())
.and_then(|p| p.parent())
.and_then(|p| p.parent())
.expect("repo root resolves")
.to_path_buf();
let json_path =
repo_root.join("public/games/age-of-dwarves/data/ai_personalities.json");
let json = fs::read_to_string(&json_path)
.unwrap_or_else(|e| panic!("read {}: {e}", json_path.display()));
mc_ai::evaluator::ScoringWeights::from_personality_json("blackhammer", &json)
.expect("blackhammer must be a known clan in ai_personalities.json")
}
/// Stable signature for an `mc_ai::tactical::Action` — mirrors
/// `action_signature` for `PlayerAction` so the recap frequency table
/// reads the same way.
fn ai_action_signature(action: &mc_ai::tactical::Action) -> String {
use mc_ai::tactical::Action as A;
match action {
A::MoveUnit { unit_id, to_hex } => format!("move:{unit_id}:{to_hex:?}"),
A::AttackTarget { attacker_id, target_id, .. } => {
format!("attack:{attacker_id}->{target_id}")
}
A::Fortify { unit_id } => format!("fortify:{unit_id}"),
A::Heal { unit_id } => format!("heal:{unit_id}"),
A::FoundCity { settler_id, .. } => format!("found:{settler_id}"),
A::SetProduction { city_id, item_id } => {
if is_building_id(item_id) {
format!("queue_building:{city_id}:{item_id}")
} else {
format!("queue_unit:{city_id}:{item_id}")
}
}
A::EnqueueBuild { city_id, item_id, .. } => {
if is_building_id(item_id) {
format!("queue_building:{city_id}:{item_id}")
} else {
format!("queue_unit:{city_id}:{item_id}")
}
}
A::Scout { unit_id, to_hex } => format!("scout:{unit_id}:{to_hex:?}"),
A::IssuePatrol { unit_id, .. } => format!("patrol:{unit_id}"),
A::PromotionPicked { unit_id, promotion_id } => {
format!("promote:{unit_id}:{promotion_id}")
}
A::AssignCitizen { .. } => "assign_citizen".into(),
A::DeploySiege { .. } => "deploy_siege".into(),
A::PackSiege { .. } => "pack_siege".into(),
A::Bombard { .. } => "bombard".into(),
}
}
/// One Claude-driven tactical action for the recap.
struct ClaudeTacticalDecision {
turn: u32,
signature: String,
events: Vec<Event>,
}
/// Per-turn summary for the strong-Claude run.
struct StrongTurnSummary {
turn: u32,
claude_decisions: Vec<ClaudeTacticalDecision>,
endturn_events: Vec<Event>,
ai_actions_applied: Vec<(u8, u32)>,
score_snapshot: Vec<(u8, i32, u32, u32)>,
}
/// Same deterministic seed derivation `drive_ai_slot` uses internally,
/// so Claude's turn picks would byte-identical-match what slot 0 would
/// have produced if it were driven by the production AI path.
fn seed_for_claude_turn(turn: u32) -> u64 {
(turn as u64)
.wrapping_mul(0x9E37_79B9_7F4A_7C15)
}
/// Drive a Claude-as-strong-AI game. Slot 0 (Claude) runs the same
/// `run_ai_turn` tactical pipeline as the AI slots but with a stronger
/// `ScoringWeights` (blackhammer's axes). Slots 1 and 2 keep
/// `ScoringWeights::default()` — the "easy AI" baseline. After Claude's
/// action chain is dispatched the driver issues a normal `EndTurn`
/// which routes through `apply_end_turn` → `drive_ai_slot` for slots 1
/// and 2 unchanged.
fn drive_strong_claude_game(
out_dir: &Path,
max_turns: u32,
) -> (Vec<StrongTurnSummary>, DriveOutcome) {
fs::create_dir_all(out_dir).expect("create out dir");
let transcript_path = out_dir.join("transcript.jsonl");
let mut transcript = fs::File::create(&transcript_path).expect("create transcript");
let mut state = build_3_player_state_like_harness();
// Stamp blackhammer weights onto Claude (slot 0). Slots 1 + 2 keep
// the default weights they got from `add_player_militarist_inline`.
let claude_weights = claude_genius_weights();
state.players[0].scoring_weights = claude_weights.clone();
let mut next_req_id: u64 = 1;
let mut summaries: Vec<StrongTurnSummary> = Vec::new();
let mut consecutive_endturn_only = 0u32;
let mut outcome = DriveOutcome::Completed;
'game: for turn in 0..max_turns {
eprintln!("[strong-claude] starting turn {turn}");
if SNAPSHOT_TURNS.contains(&turn) {
let view = project_view(&state, 0, false);
let snapshot_path = out_dir.join(format!("state-turn-{turn:02}.json"));
let json = serde_json::to_string_pretty(&view).expect("snapshot serialise");
fs::write(&snapshot_path, json).expect("write snapshot");
}
let mut summary = StrongTurnSummary {
turn,
claude_decisions: Vec::new(),
endturn_events: Vec::new(),
ai_actions_applied: Vec::new(),
score_snapshot: Vec::new(),
};
// ── Claude's turn: project tactical, run the tactical pipeline,
// dispatch each `mc_ai::Action` directly via `apply_ai_action`.
let view_req_id = next_req_id;
next_req_id += 1;
let view_req = Request::View { id: Some(view_req_id) };
write_jsonl(&mut transcript, &view_req);
let view = project_view(&state, 0, false);
let view_resp = Response::Ok {
id: Some(view_req_id),
ok: OkMarker,
events: Vec::new(),
view: view.clone(),
};
write_jsonl(&mut transcript, &view_resp);
let mut tactical = project_tactical(&state, 0);
tactical.current_player = 0;
let seed = seed_for_claude_turn(turn);
let actions =
mc_ai::tactical::run_ai_turn(&tactical, 0, &claude_weights, seed);
let mut took_real_action = false;
for action in actions {
let signature = ai_action_signature(&action);
// Dispatch under `catch_unwind` for the same residual-overflow
// safety the original `drive_game` carries.
let dispatch = std::panic::catch_unwind(AssertUnwindSafe(|| {
apply_ai_action(&mut state, 0, action)
}));
let result = match dispatch {
Ok(r) => r,
Err(payload) => {
let msg = panic_payload_to_string(&payload);
eprintln!("[panic] strong-claude apply_ai_action at turn {turn}: {msg}");
use mc_player_api::wire::{HarnessNotification, Notification};
let note = Notification::Harness(HarnessNotification::ProtocolError {
message: format!(
"apply_ai_action panic at turn {turn}: {msg}"
),
});
write_jsonl(&mut transcript, &note);
outcome = DriveOutcome::EndTurnPanic { turn, message: msg };
summaries.push(summary);
break 'game;
}
};
// Any dispatched tactical action counts as activity for the
// stuck-detector — unit-verb actions return `Ok(vec![])`
// synchronously (events batch to EndTurn), so checking
// `!events.is_empty()` would always trip Stuck after the
// first 10 quiet turns even when Claude is moving 40+ units.
took_real_action = true;
match result {
Ok(events) => {
summary.claude_decisions.push(ClaudeTacticalDecision {
turn,
signature: signature.clone(),
events: events.clone(),
});
}
Err(_err) => {
// Per-action errors (UnknownUnit, IllegalAction) are
// tolerated — the production `drive_ai_slot` has the
// same posture. Still log the attempt for the recap.
summary.claude_decisions.push(ClaudeTacticalDecision {
turn,
signature: signature.clone(),
events: Vec::new(),
});
}
}
}
// ── End-of-turn: route through the normal apply_action(EndTurn)
// path so slots 1+2 run via `drive_ai_slot` unmodified.
let act_req_id = next_req_id;
next_req_id += 1;
let act_req = Request::Act {
id: Some(act_req_id),
action: PlayerAction::EndTurn,
};
write_jsonl(&mut transcript, &act_req);
let dispatch = std::panic::catch_unwind(AssertUnwindSafe(|| {
apply_action(&mut state, 0, &PlayerAction::EndTurn)
}));
let result = match dispatch {
Ok(r) => r,
Err(payload) => {
let msg = panic_payload_to_string(&payload);
eprintln!("[panic] strong-claude EndTurn at turn {turn}: {msg}");
use mc_player_api::wire::{HarnessNotification, Notification};
let note = Notification::Harness(HarnessNotification::ProtocolError {
message: format!("EndTurn panic at turn {turn}: {msg}"),
});
write_jsonl(&mut transcript, &note);
outcome = DriveOutcome::EndTurnPanic { turn, message: msg };
summaries.push(summary);
break 'game;
}
};
let post_view = project_view(&state, 0, false);
match &result {
Ok(events) => {
let resp = Response::Ok {
id: Some(act_req_id),
ok: OkMarker,
events: events.clone(),
view: post_view.clone(),
};
write_jsonl(&mut transcript, &resp);
summary.endturn_events = events.clone();
for ev in events {
if let Event::AiTurnCompleted { player, actions_applied } = ev {
summary.ai_actions_applied.push((*player, *actions_applied));
}
}
}
Err(err) => {
use mc_player_api::wire::ErrMarker;
let resp = Response::Err {
id: Some(act_req_id),
ok: ErrMarker,
error: err.clone(),
};
write_jsonl(&mut transcript, &resp);
}
}
// Score snapshot post-EndTurn.
for (p_idx, p) in state.players.iter().enumerate() {
summary.score_snapshot.push((
p_idx as u8,
p.gold,
p.cities.len() as u32,
p.units.len() as u32,
));
}
if took_real_action {
consecutive_endturn_only = 0;
} else {
consecutive_endturn_only += 1;
}
let is_stuck = consecutive_endturn_only >= STUCK_TURN_THRESHOLD;
let game_over = summary
.endturn_events
.iter()
.any(|e| matches!(e, Event::GameOver { .. }));
summaries.push(summary);
if is_stuck {
outcome = DriveOutcome::Stuck(turn);
break 'game;
}
if game_over {
outcome = DriveOutcome::NaturalGameOver(turn);
break 'game;
}
}
(summaries, outcome)
}
/// Strong-Claude artifact dir.
fn strong_claude_dir() -> PathBuf {
let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let repo_root = crate_dir
.parent()
.and_then(|p| p.parent())
.and_then(|p| p.parent())
.and_then(|p| p.parent())
.expect("repo root resolves")
.to_path_buf();
repo_root.join(".local/demo-runs/2026-05-12-claude-mcts-vs-easy-ai")
}
/// Recap for the strong-Claude run. Mirrors `write_long_recap` shape so
/// the two artifacts are diff-able side-by-side.
fn write_strong_claude_recap(
out_dir: &Path,
summaries: &[StrongTurnSummary],
outcome: &DriveOutcome,
horizon: u32,
) {
use std::collections::BTreeMap;
let path = out_dir.join("recap.md");
let mut md = String::new();
md.push_str("# Claude-as-Strong-AI Transcript — 2026-05-12\n\n");
md.push_str(
"**Source**: \
`mc-player-api/tests/full_game_transcript.rs::claude_mcts_vs_two_easy_ais_transcript`\n\n",
);
md.push_str("**Construction**:\n\n");
md.push_str(
"- Slot 0 (Claude): blackhammer `ScoringWeights` from \
`public/games/age-of-dwarves/data/ai_personalities.json`, \
running `mc_ai::tactical::run_ai_turn` (the same tactical \
heuristic the AI slots use).\n",
);
md.push_str(
"- Slots 1 + 2 (AIs): `ScoringWeights::default()` baseline, \
driven by the production `apply_end_turn` `drive_ai_slot` \
path unchanged.\n\n",
);
md.push_str(
"**Why not real MCTS?** `mc_ai::tactical::run_ai_turn` is the \
deterministic heuristic pipeline (movement combat_predict \
settle production citizen), not MCTS. The MCTS code in \
`mc-ai/src/mcts.rs` is not on the AI-slot turn path in the \
current dispatch wiring the task brief misnamed the function. \
Documented in module comment block for `claude_genius_weights`.\n\n",
);
md.push_str(&format!("**Horizon**: {} turns (ceiling)\n\n", horizon));
md.push_str(&format!("**Turns played**: {}\n\n", summaries.len()));
md.push_str(&format!("**Termination**: {:?}\n\n", outcome));
// Victory outcome up front — this is the headline answer.
md.push_str("## Victory outcome\n\n");
let game_over_event: Option<&Event> = summaries
.iter()
.flat_map(|s| s.endturn_events.iter())
.find(|e| matches!(e, Event::GameOver { .. }));
match game_over_event {
Some(Event::GameOver { winner, victory_type }) => {
let claude_won = *winner == 0;
md.push_str(&format!(
"- `Event::GameOver` fired: winner=**{}**, victory_type=**{}**\n",
winner, victory_type
));
md.push_str(&format!(
"- Claude (slot 0) result: **{}**\n\n",
if claude_won { "WIN" } else { "LOSS" }
));
}
_ => {
md.push_str(&format!(
"- No `Event::GameOver` fired. Final turn = {}, outcome = `{:?}`.\n\n",
summaries.last().map(|s| s.turn).unwrap_or(0),
outcome
));
}
}
// Final-score table — direct head-to-head.
md.push_str("## Final scores (all slots)\n\n");
md.push_str("| slot | gold | cities | units |\n");
md.push_str("|------|------|--------|-------|\n");
if let Some(last) = summaries.last() {
for (slot, gold, cities, units) in &last.score_snapshot {
let label = if *slot == 0 { "0 (Claude/blackhammer)" } else { "AI (default weights)" };
md.push_str(&format!(
"| {} {} | {} | {} | {} |\n",
slot, label, gold, cities, units
));
}
}
md.push_str("\n");
// Per-25-turn checkpoints, slot 0.
md.push_str("## Per-25-turn checkpoints (slot 0 = Claude)\n\n");
md.push_str("| turn | gold | cities | units |\n");
md.push_str("|------|------|--------|-------|\n");
let checkpoint_turns: Vec<u32> = (0..=horizon).step_by(25).collect();
for ct in &checkpoint_turns {
if let Some(summary) = summaries.iter().find(|s| s.turn == *ct) {
if let Some((_, gold, cities, units)) =
summary.score_snapshot.iter().find(|(slot, _, _, _)| *slot == 0)
{
md.push_str(&format!("| {} | {} | {} | {} |\n", ct, gold, cities, units));
}
}
}
md.push_str("\n");
// Aggregate counts across the whole run.
let mut techs = 0u32;
let mut buildings_done = 0u32;
let mut units_killed = 0u32;
let mut cities_founded = 0u32;
let mut combat_resolved = 0u32;
for s in summaries {
for e in s.endturn_events.iter().chain(
s.claude_decisions.iter().flat_map(|d| d.events.iter()),
) {
match e {
Event::TechResearched { .. } => techs += 1,
Event::CityBuildingCompleted { .. } => buildings_done += 1,
Event::UnitDestroyed { .. } => units_killed += 1,
Event::CityFounded { .. } => cities_founded += 1,
Event::CombatResolved { .. } => combat_resolved += 1,
_ => {}
}
}
}
md.push_str("## Total counts over the full run\n\n");
md.push_str(&format!("- Techs researched: {}\n", techs));
md.push_str(&format!("- Buildings completed: {}\n", buildings_done));
md.push_str(&format!("- Combat resolutions: {}\n", combat_resolved));
md.push_str(&format!("- Units killed: {}\n", units_killed));
md.push_str(&format!("- Cities founded: {}\n\n", cities_founded));
// Action-signature frequency for Claude's tactical chain.
md.push_str("## Claude action-signature frequency\n\n");
let mut freq: BTreeMap<String, u32> = BTreeMap::new();
for s in summaries {
for d in &s.claude_decisions {
let head = d.signature.split(':').next().unwrap_or(&d.signature).to_string();
*freq.entry(head).or_insert(0) += 1;
}
}
md.push_str("| action | count |\n|--------|-------|\n");
for (k, v) in &freq {
md.push_str(&format!("| `{}` | {} |\n", k, v));
}
md.push_str("\n");
fs::write(&path, md).expect("write strong-claude recap");
}
/// 500-turn Claude-as-strong-AI test. `#[ignore]`d like
/// `long_game_transcript`; run via:
///
/// ```sh
/// cargo test -p mc-player-api --test full_game_transcript -- \
/// --ignored claude_mcts_vs_two_easy_ais_transcript --nocapture
/// ```
///
/// Note the test name preserves the original task brief's wording
/// (`claude_mcts_vs_two_easy_ais`) even though the implementation runs
/// the tactical heuristic rather than MCTS — the rename would break
/// the surface the brief asked for. The recap and module comment make
/// the actual semantics explicit.
#[test]
#[ignore = "500-turn strong-Claude run; invoke via --ignored"]
fn claude_mcts_vs_two_easy_ais_transcript() {
const STRONG_HORIZON: u32 = 500;
let out_dir = strong_claude_dir();
let _ = fs::remove_dir_all(&out_dir);
let (summaries, outcome) = drive_strong_claude_game(&out_dir, STRONG_HORIZON);
write_strong_claude_recap(&out_dir, &summaries, &outcome, STRONG_HORIZON);
// Sanity checks.
assert!(
!summaries.is_empty(),
"strong-claude run produced zero turns"
);
let transcript_path = out_dir.join("transcript.jsonl");
assert!(
transcript_path.exists(),
"transcript missing at {}",
transcript_path.display()
);
// Surface the headline result on the test log.
let game_over = summaries
.iter()
.flat_map(|s| s.endturn_events.iter())
.find_map(|e| match e {
Event::GameOver { winner, victory_type } => {
Some((*winner, victory_type.clone()))
}
_ => None,
});
eprintln!(
"[strong-claude] turns={} outcome={:?} game_over={:?} artifact={}",
summaries.len(),
outcome,
game_over,
out_dir.display()
);
}