feat(@projects/@magic-civilization): ✨ expand claude action priority branches
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
92cd9f547d
commit
c7833f6dfa
2 changed files with 533 additions and 30 deletions
|
|
@ -1146,3 +1146,90 @@ with score / AI-action / event tables.
|
|||
in `recap.md`.
|
||||
- **Phase 13 screenshots STILL gated on p2-72**. This is the
|
||||
API+transcript form of Phase 5; no rendered proof scene captured.
|
||||
|
||||
## Analysis — policy extension (2026-05-12)
|
||||
|
||||
Extension to `tests/full_game_transcript.rs::pick_claude_action`: all
|
||||
five additional priority branches wired (Promote, ResearchTech, Attack,
|
||||
QueueProduction-building, plus existing Move/Fortify/EndTurn).
|
||||
|
||||
### Priority order (post-extension)
|
||||
|
||||
1. `FoundCity` (founder ready)
|
||||
2. `Promote` from `view.pending_events.promotion_picks` — inert today
|
||||
(projector hard-codes `PendingEventsView::default()`)
|
||||
3. `ResearchTech` from `view.legal_actions` — inert today (projector
|
||||
does not enumerate `ResearchTech`; no `TechWeb` handle on `GameState`)
|
||||
4. `Attack` — lowest-HP visible defender (resolved via `view.units`
|
||||
lookup at target hex)
|
||||
5. `QueueProduction` (building) — preference order
|
||||
library → school → granary → farm → aqueduct → forge → mine →
|
||||
workshop → market → bank → first legal building
|
||||
6. `QueueProduction` (`dwarf_warrior` then any other unit)
|
||||
7. `Move`
|
||||
8. `Fortify`
|
||||
9. `EndTurn`
|
||||
|
||||
Building MUST precede unit production because the bench `CityState`
|
||||
queue is single-slot — once a warrior queues, no further
|
||||
`QueueProduction` surfaces until completion.
|
||||
|
||||
### Projector gaps acknowledged (NOT fixed in this pass)
|
||||
|
||||
- `ResearchTech` not in `view.legal_actions`. Tracked by
|
||||
`p2-67-followup-legal-actions`.
|
||||
- `Promote` not in `view.units[*].legal_actions`. Sources from
|
||||
`view.pending_events.promotion_picks`, hard-coded to default by
|
||||
projector. Tracked by `p2-67-followup-legal-actions`.
|
||||
- `Attack` DOES surface from `view.units[*].legal_actions` for
|
||||
enemy-occupied adjacent hexes; verified by 250-turn long-run output
|
||||
(5 `Attack` actions dispatched).
|
||||
|
||||
### 25-turn determinism test
|
||||
|
||||
`claude_vs_ai_full_game_transcript` extended with a new assertion:
|
||||
`queue_building:` signature must fire at least once over the 25-turn
|
||||
horizon. Byte-identical determinism across two runs preserved.
|
||||
|
||||
### 250-turn long-run
|
||||
|
||||
New test `long_game_transcript` (`#[ignore]`), run via
|
||||
`cargo test --release -p mc-player-api --test full_game_transcript -- --ignored long_game_transcript --nocapture`.
|
||||
Artifact dir: `.local/demo-runs/2026-05-12-claude-vs-easy-ai-250-turn/`.
|
||||
|
||||
**Outcome of one run (local macOS, release)**: ran the full 250-turn
|
||||
horizon, no PvP overflow panic (`DriveOutcome::Completed`). No
|
||||
`Event::GameOver` fired. Final scores:
|
||||
|
||||
| slot | gold | cities | units |
|
||||
|------|------|--------|-------|
|
||||
| 0 (Claude) | 9116 | 5 | 44 |
|
||||
| 1 (blackhammer) | 21236 | 20 | 732 |
|
||||
| 2 (deepforge) | 32260 | 25 | 1696 |
|
||||
|
||||
**Action-signature frequency** (Claude only, 250 turns):
|
||||
|
||||
| signature head | count |
|
||||
|----------------|-------|
|
||||
| `found` | 9 |
|
||||
| `attack` | 5 |
|
||||
| `queue_building` | 14 |
|
||||
| `move` | 39 |
|
||||
| `fortify` | 951 |
|
||||
| `end_turn` | 250 |
|
||||
| `research_tech` | 0 (inert — projector gap) |
|
||||
| `promote` | 0 (inert — projector gap) |
|
||||
|
||||
Confirm-by-absence: `research_tech:` and `promote:` did NOT fire, as
|
||||
expected. If they ever DO fire in a future run, projector gaps have
|
||||
closed and the policy is now exercising those branches.
|
||||
|
||||
### Residual notes
|
||||
|
||||
- AI slots vastly outscale Claude on units (732 / 1696 vs 44). The
|
||||
deterministic policy is conservative — building branch loops on
|
||||
`library` and `Fortify` dominates by ~95%. Real Claude-via-API would
|
||||
diversify; this is a fixture-policy artefact, not an engine bug.
|
||||
- Combat overflow at `mc-turn/src/processor.rs:2425` did NOT reproduce
|
||||
in this 250-turn run. The harness PvP path apparently exercises a
|
||||
different code path than the bench. Tracked under p2-67-followup.
|
||||
|
|
|
|||
|
|
@ -61,10 +61,17 @@ use mc_player_api::{apply_action, PlayerView};
|
|||
mod common;
|
||||
use common::build_3_player_state_like_harness;
|
||||
|
||||
/// Maximum turns to drive. Game can end earlier on natural victory /
|
||||
/// stuck-detection.
|
||||
/// Default turn horizon for the standard transcript test. Game can
|
||||
/// end earlier on natural victory / stuck-detection. The long-game
|
||||
/// variant (`long_game_transcript`, `#[ignore]`) passes a larger value
|
||||
/// directly into `drive_game`.
|
||||
const MAX_TURNS: u32 = 25;
|
||||
|
||||
/// Long-horizon turn cap. The PvP combat overflow at
|
||||
/// `mc-turn/src/processor.rs:2425` is expected to terminate the run
|
||||
/// before this is reached; the cap exists as a hard ceiling.
|
||||
const LONG_GAME_MAX_TURNS: u32 = 250;
|
||||
|
||||
/// Max actions Claude takes per turn before forced EndTurn — guards
|
||||
/// against runaway policy bugs (the policy is deterministic and should
|
||||
/// converge on EndTurn naturally but a hard cap is cheap).
|
||||
|
|
@ -97,42 +104,101 @@ struct TurnSummary {
|
|||
score_snapshot: Vec<(u8, i32, u32, u32)>,
|
||||
}
|
||||
|
||||
/// Known building ids from the bench `ai_building_catalog` (see
|
||||
/// `tests/common/mod.rs::build_building_catalog`). Anything not in this
|
||||
/// set inside a `QueueProduction { item }` is treated as a unit. Fixture-
|
||||
/// scoped — when the catalog grows, extend this list.
|
||||
const BENCH_BUILDING_IDS: &[&str] = &["granary", "forge", "library", "walls"];
|
||||
|
||||
/// `true` if `item` is a building id under the bench harness catalog.
|
||||
fn is_building_id(item: &str) -> bool {
|
||||
BENCH_BUILDING_IDS.iter().any(|b| *b == item)
|
||||
}
|
||||
|
||||
/// Stable signature of a `PlayerAction` for blacklist tracking. Two
|
||||
/// actions with the same signature are considered "same retry" — used
|
||||
/// to skip dispatched-but-no-op actions on a given turn.
|
||||
///
|
||||
/// `QueueProduction` is split into `queue_building:` and `queue_unit:`
|
||||
/// arms so the determinism test can assert that the building branch of
|
||||
/// `pick_claude_action` actually fires at least once over the 25-turn
|
||||
/// horizon (the single-slot city queue empties between turns, so both
|
||||
/// branches get an opportunity).
|
||||
fn action_signature(a: &PlayerAction) -> String {
|
||||
match a {
|
||||
PlayerAction::FoundCity { unit_id } => format!("found:{unit_id}"),
|
||||
PlayerAction::QueueProduction { city_id, item, .. } => {
|
||||
format!("queue:{city_id}:{item}")
|
||||
if is_building_id(item) {
|
||||
format!("queue_building:{city_id}:{item}")
|
||||
} else {
|
||||
format!("queue_unit:{city_id}:{item}")
|
||||
}
|
||||
}
|
||||
PlayerAction::Move { unit_id, to } => format!("move:{unit_id}:{:?}", to),
|
||||
PlayerAction::Attack { unit_id, target } => {
|
||||
format!("attack:{unit_id}:{:?}", target)
|
||||
}
|
||||
PlayerAction::Fortify { unit_id } => format!("fortify:{unit_id}"),
|
||||
PlayerAction::ResearchTech { tech_id } => format!("research_tech:{tech_id}"),
|
||||
PlayerAction::Promote(pick) => {
|
||||
format!("promote:{}:{}", pick.unit_id, pick.promotion_id)
|
||||
}
|
||||
PlayerAction::EndTurn => "end_turn".into(),
|
||||
other => format!("other:{other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Building-id preference order for the building branch of
|
||||
/// `pick_claude_action`. Research-flavoured first, then food/growth,
|
||||
/// then production, then gold, then defense. Anything not matched falls
|
||||
/// through to "first legal building" inside the policy.
|
||||
const BUILDING_PREFERENCE: &[&str] = &[
|
||||
// research-flavoured
|
||||
"library", "school",
|
||||
// food / growth
|
||||
"granary", "farm", "aqueduct",
|
||||
// production
|
||||
"forge", "mine", "workshop",
|
||||
// gold
|
||||
"market", "bank",
|
||||
];
|
||||
|
||||
/// Pick Claude's next action by reading the projector-computed
|
||||
/// `legal_actions` lists directly. Priority order (p2-67-followup):
|
||||
/// `legal_actions` lists directly. Priority order (p2-67 follow-up
|
||||
/// extension):
|
||||
///
|
||||
/// 1. Any unit-level `FoundCity` (founder ready).
|
||||
/// 2. Any city-level `QueueProduction { item: "dwarf_warrior" }`
|
||||
/// (preferred over generic items so the transcript hits the
|
||||
/// "AI builds a unit by turn 10" constraint shape).
|
||||
/// 3. Any other city-level `QueueProduction` entry.
|
||||
/// 4. Any unit-level `Fortify` (defensive posture).
|
||||
/// 5. Empire-level `EndTurn` fallback.
|
||||
/// 1. Unit-level `FoundCity` (founder ready).
|
||||
/// 2. Pending `Promote` from `view.pending_events.promotion_picks`.
|
||||
/// Inert today: projector hard-codes
|
||||
/// `pending_events: PendingEventsView::default()`. This branch
|
||||
/// activates automatically when that projector gap closes.
|
||||
/// TRACKED: `p2-67-followup-legal-actions`.
|
||||
/// 3. Empire-level `ResearchTech`. Inert today: projector does NOT
|
||||
/// enumerate `ResearchTech` (no `TechWeb` handle on `GameState`).
|
||||
/// The branch dispatches if any entry surfaces — the dispatcher
|
||||
/// accepts arbitrary tech ids via `set_researching_unchecked`.
|
||||
/// TRACKED: `p2-67-followup-legal-actions`.
|
||||
/// 4. Unit-level `Attack`, preferring the defender with the lowest
|
||||
/// visible HP (resolved by cross-referencing `target` hex against
|
||||
/// `view.units` for an enemy occupant). Falls back to first legal
|
||||
/// `Attack` if no enemy unit is visible at the target hex.
|
||||
/// 5. City-level `QueueProduction` for a building, preferred per
|
||||
/// `BUILDING_PREFERENCE`. MUST sit above the unit branch — the
|
||||
/// bench `CityState` is single-slot, so once the city queues a
|
||||
/// warrior the queue is non-empty and no further
|
||||
/// `QueueProduction` surfaces until the warrior completes. Without
|
||||
/// this ordering the building branch would be permanently
|
||||
/// shadowed.
|
||||
/// 6. City-level `QueueProduction` for `dwarf_warrior`, then any
|
||||
/// other unit.
|
||||
/// 7. Unit-level `Move` (drives constraint 4 movement).
|
||||
/// 8. Unit-level `Fortify` (defensive posture).
|
||||
/// 9. Empire-level `EndTurn` fallback.
|
||||
///
|
||||
/// Entries already attempted on this turn (via `blacklist`) are skipped
|
||||
/// so a no-op-but-Ok dispatch (e.g. founder city founded synchronously
|
||||
/// but `Event::CityFounded` deferred to EndTurn) doesn't loop forever.
|
||||
fn pick_claude_action(view: &PlayerView, blacklist: &HashSet<String>) -> PlayerAction {
|
||||
// Own units / cities are already filter-narrowed: the projector
|
||||
// emits `legal_actions` only on entities owned by the bound player.
|
||||
// We still walk `view.units` / `view.cities` to find entries, but
|
||||
// ownership is implicit in `legal_actions.is_empty()`.
|
||||
|
||||
// Priority 1 — FoundCity from any unit's legal_actions list.
|
||||
for unit in &view.units {
|
||||
for entry in &unit.legal_actions {
|
||||
|
|
@ -145,11 +211,95 @@ fn pick_claude_action(view: &PlayerView, blacklist: &HashSet<String>) -> PlayerA
|
|||
}
|
||||
}
|
||||
|
||||
// Priority 2 — QueueProduction(dwarf_warrior) from any city.
|
||||
// Priority 2 — Promote from pending_events.promotion_picks. The
|
||||
// bench projector emits an empty `PendingEventsView`; this branch
|
||||
// is wired in advance of the projector gap closing. When
|
||||
// promotion_picks is populated, we synthesise a `Promote` action
|
||||
// with promotion id `"shock"` — the dispatcher's `apply_promote`
|
||||
// accepts any string and emits `Event::UnitPromoted { promotion }`
|
||||
// verbatim.
|
||||
for unit_id in &view.pending_events.promotion_picks {
|
||||
let action = PlayerAction::Promote(PromotionPick {
|
||||
unit_id: unit_id.clone(),
|
||||
promotion_id: "shock".to_string(),
|
||||
});
|
||||
let sig = action_signature(&action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return action;
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 3 — ResearchTech from empire legal_actions. The
|
||||
// projector does not currently emit these (see
|
||||
// `project_empire_legal_actions` doc-comment), so this branch is
|
||||
// dead today. When the projector grows a `TechWeb` handle and
|
||||
// surfaces `ResearchTech` entries, this fires automatically.
|
||||
for entry in &view.legal_actions {
|
||||
if let PlayerAction::ResearchTech { .. } = &entry.action {
|
||||
let sig = action_signature(&entry.action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return entry.action.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 4 — Attack the lowest-HP visible enemy unit. Scan every
|
||||
// own-unit's `Attack` entries, resolve `target` → enemy unit's HP
|
||||
// via `view.units`, pick min. Ties broken by iteration order
|
||||
// (deterministic across runs because the projector iterates units
|
||||
// in `PlayerState.units` Vec order).
|
||||
let bound_player = view.player;
|
||||
let mut best_attack: Option<(i32, PlayerAction)> = None;
|
||||
for unit in &view.units {
|
||||
for entry in &unit.legal_actions {
|
||||
if let PlayerAction::Attack { target, .. } = &entry.action {
|
||||
let sig = action_signature(&entry.action);
|
||||
if blacklist.contains(&sig) {
|
||||
continue;
|
||||
}
|
||||
let target_pos = *target;
|
||||
let target_hp = view
|
||||
.units
|
||||
.iter()
|
||||
.find(|u| u.position == target_pos && u.owner != bound_player)
|
||||
.map(|u| u.hp)
|
||||
.unwrap_or(i32::MAX);
|
||||
match &best_attack {
|
||||
None => best_attack = Some((target_hp, entry.action.clone())),
|
||||
Some((cur, _)) if target_hp < *cur => {
|
||||
best_attack = Some((target_hp, entry.action.clone()))
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some((_, action)) = best_attack {
|
||||
return action;
|
||||
}
|
||||
|
||||
// Priority 5 — QueueProduction for a building. Must precede unit
|
||||
// production: single-slot city queue means once a warrior queues,
|
||||
// no further `QueueProduction` surfaces until completion. Pick by
|
||||
// BUILDING_PREFERENCE, fall through to first legal building.
|
||||
for preferred in BUILDING_PREFERENCE {
|
||||
for city in &view.cities {
|
||||
for entry in &city.legal_actions {
|
||||
if let PlayerAction::QueueProduction { item, .. } = &entry.action {
|
||||
if item == preferred && is_building_id(item) {
|
||||
let sig = action_signature(&entry.action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return entry.action.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for city in &view.cities {
|
||||
for entry in &city.legal_actions {
|
||||
if let PlayerAction::QueueProduction { item, .. } = &entry.action {
|
||||
if item == "dwarf_warrior" {
|
||||
if is_building_id(item) {
|
||||
let sig = action_signature(&entry.action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return entry.action.clone();
|
||||
|
|
@ -159,19 +309,35 @@ fn pick_claude_action(view: &PlayerView, blacklist: &HashSet<String>) -> PlayerA
|
|||
}
|
||||
}
|
||||
|
||||
// Priority 3 — any other QueueProduction.
|
||||
// Priority 6a — QueueProduction(dwarf_warrior) from any city.
|
||||
for city in &view.cities {
|
||||
for entry in &city.legal_actions {
|
||||
if let PlayerAction::QueueProduction { .. } = &entry.action {
|
||||
let sig = action_signature(&entry.action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return entry.action.clone();
|
||||
if let PlayerAction::QueueProduction { item, .. } = &entry.action {
|
||||
if item == "dwarf_warrior" && !is_building_id(item) {
|
||||
let sig = action_signature(&entry.action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return entry.action.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 4a — Move any unit (drives the constraint-4 unit-move
|
||||
// Priority 6b — any other unit QueueProduction.
|
||||
for city in &view.cities {
|
||||
for entry in &city.legal_actions {
|
||||
if let PlayerAction::QueueProduction { item, .. } = &entry.action {
|
||||
if !is_building_id(item) {
|
||||
let sig = action_signature(&entry.action);
|
||||
if !blacklist.contains(&sig) {
|
||||
return entry.action.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 7 — Move any unit (drives the constraint-4 unit-move
|
||||
// requirement). Walk units in order and pick the first non-blacklisted
|
||||
// Move entry. The projector emits all 6 in-bounds/biome-passable
|
||||
// neighbours; on a grid-less bench state it emits all 6 raw
|
||||
|
|
@ -188,7 +354,7 @@ fn pick_claude_action(view: &PlayerView, blacklist: &HashSet<String>) -> PlayerA
|
|||
}
|
||||
}
|
||||
|
||||
// Priority 4b — Fortify from any unit's legal_actions list.
|
||||
// Priority 8 — Fortify from any unit's legal_actions list.
|
||||
for unit in &view.units {
|
||||
for entry in &unit.legal_actions {
|
||||
if let PlayerAction::Fortify { .. } = &entry.action {
|
||||
|
|
@ -200,7 +366,7 @@ fn pick_claude_action(view: &PlayerView, blacklist: &HashSet<String>) -> PlayerA
|
|||
}
|
||||
}
|
||||
|
||||
// Priority 5 — End turn.
|
||||
// Priority 9 — End turn.
|
||||
PlayerAction::EndTurn
|
||||
}
|
||||
|
||||
|
|
@ -236,7 +402,7 @@ pub enum DriveOutcome {
|
|||
/// Drive one full game from a freshly-built state. Writes the JSONL
|
||||
/// transcript and per-turn snapshot files into `out_dir`. Returns the
|
||||
/// in-memory turn summaries for the recap + the terminating outcome.
|
||||
fn drive_game(out_dir: &Path) -> (Vec<TurnSummary>, DriveOutcome) {
|
||||
fn drive_game(out_dir: &Path, max_turns: u32) -> (Vec<TurnSummary>, DriveOutcome) {
|
||||
fs::create_dir_all(out_dir).expect("create out dir");
|
||||
let transcript_path = out_dir.join("transcript.jsonl");
|
||||
let mut transcript = fs::File::create(&transcript_path).expect("create transcript");
|
||||
|
|
@ -250,7 +416,7 @@ fn drive_game(out_dir: &Path) -> (Vec<TurnSummary>, DriveOutcome) {
|
|||
let mut consecutive_endturn_only = 0u32;
|
||||
let mut outcome = DriveOutcome::Completed;
|
||||
|
||||
'game: for turn in 0..MAX_TURNS {
|
||||
'game: for turn in 0..max_turns {
|
||||
eprintln!("[turn] starting turn {turn}");
|
||||
// Snapshot at start of turn for designated boundaries.
|
||||
if SNAPSHOT_TURNS.contains(&turn) {
|
||||
|
|
@ -693,14 +859,14 @@ fn claude_vs_ai_full_game_transcript() {
|
|||
let out_dir = demo_run_dir();
|
||||
// Wipe any prior run so the artifact tree is fresh.
|
||||
let _ = fs::remove_dir_all(&out_dir);
|
||||
let (summaries, outcome) = drive_game(&out_dir);
|
||||
let (summaries, outcome) = drive_game(&out_dir, MAX_TURNS);
|
||||
write_recap(&out_dir, &summaries, &outcome);
|
||||
|
||||
// ── Hard constraint 1: byte-identical transcript across two runs ────
|
||||
// Re-run into a sibling directory and diff the transcript.jsonl files.
|
||||
let determinism_dir = out_dir.with_file_name("2026-05-12-claude-vs-ai-mock-determinism");
|
||||
let _ = fs::remove_dir_all(&determinism_dir);
|
||||
let (_summaries2, outcome2) = drive_game(&determinism_dir);
|
||||
let (_summaries2, outcome2) = drive_game(&determinism_dir, MAX_TURNS);
|
||||
assert_eq!(
|
||||
outcome, outcome2,
|
||||
"drive_game outcome differs between runs: a={outcome:?} b={outcome2:?}"
|
||||
|
|
@ -843,4 +1009,254 @@ fn claude_vs_ai_full_game_transcript() {
|
|||
summaries.len(),
|
||||
4 * summaries.len()
|
||||
);
|
||||
|
||||
// ── Hard constraint 5 (post-extension): queue_building: fires ≥ 1 ───
|
||||
// Building branch sits above the unit branch in `pick_claude_action`.
|
||||
// The single-slot bench city queue empties after each completed
|
||||
// item, so over a 25-turn horizon the building branch MUST fire at
|
||||
// least once. If this regresses, the policy ordering is wrong (the
|
||||
// unit branch is shadowing builds) or the projector stopped
|
||||
// emitting building-id `QueueProduction` entries.
|
||||
let signatures: Vec<String> = summaries
|
||||
.iter()
|
||||
.flat_map(|s| s.claude_actions.iter().map(|d| action_signature(&d.action)))
|
||||
.collect();
|
||||
let any_building = signatures.iter().any(|s| s.starts_with("queue_building:"));
|
||||
assert!(
|
||||
any_building,
|
||||
"expected at least one queue_building: action across {} turns; \
|
||||
observed signatures: {:?}",
|
||||
summaries.len(),
|
||||
signatures
|
||||
);
|
||||
// Note: research_tech: and promote: are intentionally NOT asserted
|
||||
// — both branches sit behind documented projector gaps
|
||||
// (`PendingEventsView::default()` and the missing `TechWeb` handle
|
||||
// on `GameState`). They surface in the recap's action-signature
|
||||
// table when the gaps close; today they will not appear.
|
||||
}
|
||||
|
||||
/// Long-horizon transcript — 250-turn ceiling, written under
|
||||
/// `.local/demo-runs/2026-05-12-claude-vs-easy-ai-250-turn/`. Gated
|
||||
/// behind `#[ignore]` so the default test suite stays at 25 turns;
|
||||
/// run via `cargo test ... -- --ignored long_game_transcript --nocapture`.
|
||||
///
|
||||
/// Hard constraints relaxed compared to the 25-turn variant — we do NOT
|
||||
/// require determinism here (one run only), and we do not assert
|
||||
/// constraint shapes. The deliverable is the transcript + recap. The
|
||||
/// PvP combat overflow at `mc-turn/src/processor.rs:2425` is expected
|
||||
/// to terminate early; `DriveOutcome::EndTurnPanic` captures the turn
|
||||
/// and message and the recap surfaces both.
|
||||
#[test]
|
||||
#[ignore = "long-horizon transcript; run via --ignored"]
|
||||
fn long_game_transcript() {
|
||||
let out_dir = long_demo_run_dir();
|
||||
let _ = fs::remove_dir_all(&out_dir);
|
||||
let (summaries, outcome) = drive_game(&out_dir, LONG_GAME_MAX_TURNS);
|
||||
write_long_recap(&out_dir, &summaries, &outcome);
|
||||
|
||||
// Minimal sanity: the run produced at least one turn of summaries
|
||||
// and the transcript file exists.
|
||||
assert!(
|
||||
!summaries.is_empty(),
|
||||
"long-game run produced zero turns — drive_game aborted before turn 0"
|
||||
);
|
||||
let transcript_path = out_dir.join("transcript.jsonl");
|
||||
assert!(
|
||||
transcript_path.exists(),
|
||||
"long-game transcript missing at {}",
|
||||
transcript_path.display()
|
||||
);
|
||||
|
||||
eprintln!(
|
||||
"[long-game] turns played = {}, outcome = {:?}, artifact dir = {}",
|
||||
summaries.len(),
|
||||
outcome,
|
||||
out_dir.display()
|
||||
);
|
||||
}
|
||||
|
||||
/// Long-game artifact dir.
|
||||
fn long_demo_run_dir() -> PathBuf {
|
||||
let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
let repo_root = crate_dir
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.and_then(|p| p.parent())
|
||||
.and_then(|p| p.parent())
|
||||
.expect("repo root resolves")
|
||||
.to_path_buf();
|
||||
repo_root.join(".local/demo-runs/2026-05-12-claude-vs-easy-ai-250-turn")
|
||||
}
|
||||
|
||||
/// Long-game recap: per-25-turn checkpoints, total counts, victory
|
||||
/// outcome, action-signature frequency table. Distinct from the 25-turn
|
||||
/// recap so the output stays focused on aggregate metrics rather than
|
||||
/// per-turn detail (which would blow out at 250 turns).
|
||||
fn write_long_recap(out_dir: &Path, summaries: &[TurnSummary], outcome: &DriveOutcome) {
|
||||
use std::collections::BTreeMap;
|
||||
let path = out_dir.join("recap.md");
|
||||
let mut md = String::new();
|
||||
md.push_str("# Claude-vs-AI Long-Horizon Transcript — 2026-05-12\n\n");
|
||||
md.push_str("**Source**: `mc-player-api/tests/full_game_transcript.rs::long_game_transcript`\n\n");
|
||||
md.push_str("**Horizon**: 250 turns (ceiling)\n\n");
|
||||
md.push_str(&format!("**Turns played**: {}\n\n", summaries.len()));
|
||||
md.push_str(&format!("**Termination**: {:?}\n\n", outcome));
|
||||
|
||||
if let DriveOutcome::EndTurnPanic { turn, message } = outcome {
|
||||
md.push_str(&format!(
|
||||
"> **mc-turn panic at turn {turn}**: `{message}`. The PvP \
|
||||
combat-resolution multiply-overflow at \
|
||||
`mc-turn/src/processor.rs:2425` is a known residual gap; \
|
||||
the transcript captures every line up to and including the \
|
||||
synthetic `protocol_error` notification emitted at abort.\n\n"
|
||||
));
|
||||
}
|
||||
|
||||
// Projector-gap acknowledgements.
|
||||
md.push_str("## Projector gaps (acknowledged, not fixed in this pass)\n\n");
|
||||
md.push_str(
|
||||
"- `ResearchTech` is NOT in `view.legal_actions`. Empire projector lacks a \
|
||||
`TechWeb` handle on `GameState`. Policy branch wired but inert. \
|
||||
TRACKED: `p2-67-followup-legal-actions`.\n",
|
||||
);
|
||||
md.push_str(
|
||||
"- `Promote` is NOT in `view.units[*].legal_actions`. Source is \
|
||||
`view.pending_events.promotion_picks`, which the projector \
|
||||
hard-codes to `PendingEventsView::default()`. Policy branch wired \
|
||||
but inert. TRACKED: `p2-67-followup-legal-actions`.\n",
|
||||
);
|
||||
md.push_str(
|
||||
"- `Attack` DOES surface from `view.units[*].legal_actions` for \
|
||||
enemy-occupied adjacent hexes — see action-signature table below \
|
||||
for whether it actually fired in this run.\n\n",
|
||||
);
|
||||
|
||||
// Per-25-turn checkpoints.
|
||||
md.push_str("## Per-25-turn checkpoints (slot 0 = Claude)\n\n");
|
||||
md.push_str("| turn | gold | cities | units | score_estimate |\n");
|
||||
md.push_str("|------|------|--------|-------|----------------|\n");
|
||||
let checkpoint_turns: Vec<u32> = (0..=LONG_GAME_MAX_TURNS).step_by(25).collect();
|
||||
for ct in &checkpoint_turns {
|
||||
if let Some(summary) = summaries.iter().find(|s| s.turn == *ct) {
|
||||
if let Some((_, gold, cities, units)) =
|
||||
summary.score_snapshot.iter().find(|(slot, _, _, _)| *slot == 0)
|
||||
{
|
||||
md.push_str(&format!(
|
||||
"| {} | {} | {} | {} | n/a |\n",
|
||||
ct, gold, cities, units
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
md.push_str("\n");
|
||||
|
||||
// Total counts over the full run.
|
||||
let mut techs = 0u32;
|
||||
let mut buildings_done = 0u32;
|
||||
let mut attacks_seen = 0u32;
|
||||
let mut units_killed = 0u32;
|
||||
let mut cities_founded = 0u32;
|
||||
for s in summaries {
|
||||
for d in &s.claude_actions {
|
||||
if matches!(d.action, PlayerAction::Attack { .. }) {
|
||||
attacks_seen += 1;
|
||||
}
|
||||
}
|
||||
for e in s.endturn_events.iter().chain(
|
||||
s.claude_actions.iter().flat_map(|d| d.events.iter()),
|
||||
) {
|
||||
match e {
|
||||
Event::TechResearched { .. } => techs += 1,
|
||||
Event::CityBuildingCompleted { .. } => buildings_done += 1,
|
||||
Event::UnitDestroyed { .. } => units_killed += 1,
|
||||
Event::CityFounded { .. } => cities_founded += 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
md.push_str("## Total counts over the full run\n\n");
|
||||
md.push_str(&format!("- Techs researched: {}\n", techs));
|
||||
md.push_str(&format!("- Buildings completed (`CityBuildingCompleted`): {}\n", buildings_done));
|
||||
md.push_str(&format!("- Attacks dispatched by Claude (`PlayerAction::Attack`): {}\n", attacks_seen));
|
||||
md.push_str(&format!("- Units killed (`UnitDestroyed` events): {}\n", units_killed));
|
||||
md.push_str(&format!("- Cities founded (`CityFounded` events): {}\n\n", cities_founded));
|
||||
|
||||
// Victory outcome.
|
||||
md.push_str("## Victory outcome\n\n");
|
||||
let game_over_event: Option<&Event> = summaries
|
||||
.iter()
|
||||
.flat_map(|s| s.endturn_events.iter())
|
||||
.find(|e| matches!(e, Event::GameOver { .. }));
|
||||
match game_over_event {
|
||||
Some(Event::GameOver { winner, victory_type }) => {
|
||||
md.push_str(&format!(
|
||||
"- `Event::GameOver` fired: winner={}, victory_type={}\n\n",
|
||||
winner, victory_type
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
md.push_str(&format!(
|
||||
"- No `Event::GameOver` fired. Final turn = {}, outcome = `{:?}`.\n\n",
|
||||
summaries.last().map(|s| s.turn).unwrap_or(0),
|
||||
outcome
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Final-score table (all slots).
|
||||
md.push_str("## Final scores (all slots)\n\n");
|
||||
md.push_str("| slot | gold | cities | units |\n");
|
||||
md.push_str("|------|------|--------|-------|\n");
|
||||
if let Some(last) = summaries.last() {
|
||||
for (slot, gold, cities, units) in &last.score_snapshot {
|
||||
md.push_str(&format!(
|
||||
"| {} | {} | {} | {} |\n",
|
||||
slot, gold, cities, units
|
||||
));
|
||||
}
|
||||
}
|
||||
md.push_str("\n");
|
||||
|
||||
// Action-signature frequency table.
|
||||
md.push_str("## Action-signature frequency\n\n");
|
||||
let mut freq: BTreeMap<String, u32> = BTreeMap::new();
|
||||
for s in summaries {
|
||||
for d in &s.claude_actions {
|
||||
// Bucket by the head of the signature (before first `:`) so the
|
||||
// table aggregates `queue_building:c_0_0:library` and
|
||||
// `queue_building:c_0_0:granary` under the same bucket.
|
||||
let sig = action_signature(&d.action);
|
||||
let head = sig.split(':').next().unwrap_or(&sig).to_string();
|
||||
*freq.entry(head).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
md.push_str("| action | count |\n|--------|-------|\n");
|
||||
for (k, v) in &freq {
|
||||
md.push_str(&format!("| `{}` | {} |\n", k, v));
|
||||
}
|
||||
md.push_str("\n");
|
||||
|
||||
// Confirm-by-absence note for the wired-but-inert branches.
|
||||
md.push_str("## Projector-gap confirmation by absence\n\n");
|
||||
let saw_research = freq.get("research_tech").copied().unwrap_or(0) > 0;
|
||||
let saw_promote = freq.get("promote").copied().unwrap_or(0) > 0;
|
||||
md.push_str(&format!(
|
||||
"- `research_tech:` observed: **{}** (expected: false — projector gap)\n",
|
||||
saw_research
|
||||
));
|
||||
md.push_str(&format!(
|
||||
"- `promote:` observed: **{}** (expected: false — projector gap)\n",
|
||||
saw_promote
|
||||
));
|
||||
if saw_research || saw_promote {
|
||||
md.push_str(
|
||||
"\n> A wired-but-inert branch fired. Projector gap may have closed — \
|
||||
re-verify `view.research.current_tech` / `view.pending_events.promotion_picks` \
|
||||
population.\n",
|
||||
);
|
||||
}
|
||||
md.push_str("\n");
|
||||
|
||||
fs::write(&path, md).expect("write long recap");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue