#!/usr/bin/env bash # huge-map-5clan.sh — THE "ultimate test". 5 AI clan personalities compete # on a map sized for 8 players, stressing the AI lookahead pipeline # end-to-end. # # Per project owner: this test should only run AFTER the 1v1 matchup grid # (`tools/matchup-grid.sh`) has shown clans are balanced in head-to-head # play. # # The map-size name here ("huge") matches the id in setup.json; dimensions # and max_players are read from the data file. If that data file's "huge" # id changes capacity, this harness picks it up automatically. # # Acceptance criteria (validated via `checklist-report.py ultimate_stress`): # - All 5 clans appear in at least one of the SEEDS runs # - Victory rate ≥ SEEDS/2 (games decisive — MCTS not stalling) # - Winner distribution non-degenerate: ≥2 distinct clans win across grid # - Median game length ≥ TURN_LIMIT*0.4 # # Usage: # tools/huge-map-5clan.sh # defaults SEEDS=10 TURN_LIMIT=500 PARALLEL=4 # SEEDS=20 tools/huge-map-5clan.sh # tools/huge-map-5clan.sh --help # # Output layout: # .local/iter/huge-map-5clan-/ # game__seed/ (SEEDS games, 5 AI clans each) # verdict.json # completion.marker set -uo pipefail RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m' DIM='\033[2m'; NC='\033[0m' : "${SEEDS:=10}" : "${TURN_LIMIT:=500}" : "${PARALLEL:=4}" : "${MAP_SIZE:=standard}" # Civ5 "Standard" = 80×52, max 8 players — the : "${NUM_PLAYERS:=5}" # smallest map that fits the user's "huge map # that 8 COULD play on" intent. Our own "huge" # (128×80, 12-player) is stretch-goal; switch to # MAP_SIZE=huge once POD's MAX_PLAYERS=4 limit is # lifted and the game supports >8 AI slots. # p1-22: bound MCTS per-decision wall-clock cost. 2000 ms caps each AI # decision. Empirically (cycle 57, 2026-05-07): 5-player MCTS on a standard # map runs ~34s/turn wall-clock, so T=300 needs ~10,200s + 25% buffer ≈ 12,750s. # autoplay-batch.sh's default formula (TURN_LIMIT * 3 + 300 = 1200s for T=300) # is calibrated for 2-player smoke — it is far too short here and killed all # 10 cycle-57 games at T32-41 (exit code 124). We set SAFETY_TIMEOUT_OVERRIDE # to TURN_LIMIT * 45 + 600 (14,100s for T=300, ~3.9h) so the per-game `timeout` # guard in autoplay-batch.sh is appropriate for 5-clan huge-map runs. # This value can be overridden via env if needed. : "${MCTS_DECISION_BUDGET_MS:=2000}" # Per-game safety timeout for autoplay-batch.sh (seconds). # Formula: TURN_LIMIT * 45 + 600 (empirically derived — see comment above). : "${SAFETY_TIMEOUT_OVERRIDE:=$(( TURN_LIMIT * 45 + 600 ))}" export SAFETY_TIMEOUT_OVERRIDE for arg in "$@"; do case "$arg" in --help|-h) grep -E '^#( |$)' "$0" | sed 's/^# \?//' exit 0 ;; *) echo "Unknown argument: $arg" >&2; exit 2 ;; esac done REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" STAMP="$(date +%Y%m%d_%H%M%S)" # HUGE_OUTPUT overrides the output dir (used by apricot-run.sh). PARENT="${HUGE_OUTPUT:-$REPO_ROOT/.local/iter/huge-map-5clan-$STAMP}" mkdir -p "$PARENT" # p1-27a — bring the warm MCTS service up before the run so per-AI-turn # wall-clock benefits from GPU init + warm cache amortisation. `services:up` # is idempotent — safe to call when the service is already running. Export # MCTS_SOCKET_PATH so the in-process gdext bridge (api-gdext/src/ai.rs) # prefers the warm socket over its fallback in-process path. # Telemetry lands in $PARENT/mcts-service.jsonl so the run's per-AI-turn # latency measurements live alongside the autoplay logs. : "${MCTS_SOCKET_PATH:=/tmp/mc-mcts.sock}" : "${MCTS_TELEMETRY_PATH:=$PARENT/mcts-service.jsonl}" export MCTS_SOCKET_PATH MCTS_TELEMETRY_PATH if [ "${SKIP_SERVICE_UP:-0}" != "1" ]; then bash "$REPO_ROOT/tools/run-services.sh" services:up || { echo -e "${YELLOW}WARN: services:up failed — continuing without warm MCTS service.${NC}" >&2 } fi # Preflight: check for a passing matchup-grid within the last 30 days. LATEST_MATCHUP_GRID="$(ls -td "$REPO_ROOT"/.local/iter/matchup-grid-*/ 2>/dev/null | head -1)" if [ -z "$LATEST_MATCHUP_GRID" ]; then echo -e "${YELLOW}WARN: no matchup-grid run found.${NC}" echo -e "${DIM}Per project owner, 1v1 matchup balance should pass before running the ultimate test.${NC}" echo -e "${DIM}Run: tools/matchup-grid.sh${NC}" echo "" else matchup_verdict="$LATEST_MATCHUP_GRID/verdict.json" if [ -f "$matchup_verdict" ] && command -v python3 >/dev/null; then pass=$(python3 -c "import json; print(json.load(open('$matchup_verdict')).get('pass', False))" 2>/dev/null || echo False) if [ "$pass" = "True" ]; then echo -e "${GREEN}prereq: matchup-grid verdict PASS${NC} ($LATEST_MATCHUP_GRID)" else echo -e "${YELLOW}WARN: most recent matchup-grid verdict is NOT passing.${NC}" echo -e "${DIM}$matchup_verdict${NC}" fi fi fi echo -e "${BLUE}huge-map-5clan (ultimate stress)${NC} — ${SEEDS} seeds × T${TURN_LIMIT} × ${NUM_PLAYERS} AI on ${MAP_SIZE} map" echo -e "${DIM}parent: $PARENT${NC}" MARKER="$PARENT/completion.marker" : > "$MARKER" # Pin all 5 slots to the 5 canonical clans so meta.player_clans is fully # populated for the ultimate_stress verdict. Without per-slot pinning, slot 0 # (the human slot) gets empty clan_id and its wins can't be attributed. # Seed-driven map variation still drives strategic divergence. MAP_SIZE="$MAP_SIZE" \ NUM_PLAYERS="$NUM_PLAYERS" \ PARALLEL="$PARALLEL" \ MCTS_DECISION_BUDGET_MS="${MCTS_DECISION_BUDGET_MS:-2000}" \ SAFETY_TIMEOUT_OVERRIDE="${SAFETY_TIMEOUT_OVERRIDE:-}" \ AI_USE_MCTS=true \ AI_PIN_PERSONALITY_P0=ironhold \ AI_PIN_PERSONALITY_P1=blackhammer \ AI_PIN_PERSONALITY_P2=goldvein \ AI_PIN_PERSONALITY_P3=deepforge \ AI_PIN_PERSONALITY_P4=runesmith \ bash "$REPO_ROOT/tools/autoplay-batch.sh" "$SEEDS" "$TURN_LIMIT" "$PARENT" \ > "$PARENT/batch.log" 2>&1 batch_rc=$? printf 'batch_exit=%d\n' "$batch_rc" >> "$MARKER" echo -e "${BLUE}computing ultimate_stress verdict…${NC}" python3 "$REPO_ROOT/tools/checklist-report.py" ultimate_stress "$PARENT" \ > "$PARENT/verdict.json" 2> "$PARENT/gate.stderr" gate_rc=$? printf 'gate_exit=%d\n' "$gate_rc" >> "$MARKER" printf 'finished=%s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$MARKER" printf 'parent=%s\n' "$PARENT" >> "$MARKER" if [ "$gate_rc" -eq 0 ]; then echo -e "${GREEN}ultimate_stress: PASS${NC}" else echo -e "${RED}ultimate_stress: FAIL${NC} (gate_exit=$gate_rc)" echo -e "${DIM}see: $PARENT/verdict.json${NC}" fi exit $gate_rc