magicciv/tools/autoplay-batch.sh.bak_20260518
2026-05-26 02:21:13 -07:00

437 lines
18 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# autoplay-batch.sh — Run auto_play N times with different seeds and collect per-game output dirs.
#
# Usage: tools/autoplay-batch.sh [--weston] [count=3] [turn_limit=500] [results_dir]
#
# Output layout:
# <results_dir>/game_<stamp>_seed<N>/
# meta.json
# turn_stats.jsonl
# events.jsonl
# game.log
# weston.log (weston mode only)
# *.save (per-turn saves, if configured)
#
# Environment:
# AUTOPLAY_HOST — If set (e.g. "lilith@apricot.local"), run each game via SSH
# using run_ap3.sh on the remote host and scp results back.
# If unset, run locally via flatpak (Linux only).
# RENDER_MODE — "headless" (default) or "weston". --weston flag sets this.
# headless: Godot --headless, no display, no screenshots.
# weston: weston headless backend, software rendering, screenshots work.
# PARALLEL — Max seeds to run concurrently (default 1 = serial).
# Remote runner is concurrency-safe via scoped pkill per AUTO_PLAY_DIR.
# Apricot has 64 cores → PARALLEL=10 is a safe, ~10× wall-clock speedup.
# LAUNCH_COOLDOWN — Seconds to sleep between launching each parallel worker (default 0).
# Use 1020 on memory-constrained hosts to stagger Godot init spikes
# so games never all peak-initialize simultaneously (prevents OOM crashes).
# SEED_OFFSET — Shift the seed range from [1..COUNT] to [1+OFFSET..COUNT+OFFSET]
# (default 0). Use for multi-sweep runs that share a parent
# results_dir and need disjoint seed numbers (e.g. Task #10
# AI_PIN_PERSONALITY rotation: 5× batches at offsets 0,10,20,30,40).
# REMOTE_BATCH_ROOT — Where remote autoplay batches write their output on the
# AUTOPLAY_HOST. Defaults to /tmp/@magic-civilization/builds
# so artifacts land OUTSIDE the remote host's dev working tree
# (writing into the working tree causes auto-commit divergence
# between hosts). Override to $REMOTE_HOME/... if the remote
# host is a pure build node without active development.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
GAME_DIR="$PROJECT_DIR/src/game"
REPO_ROOT="$PROJECT_DIR"
RENDER_MODE="${RENDER_MODE:-headless}"
# Parse --weston flag before positional args
POSITIONAL=()
for arg in "$@"; do
case "$arg" in
--weston) RENDER_MODE="weston" ;;
--headless) RENDER_MODE="headless" ;;
*) POSITIONAL+=("$arg") ;;
esac
done
set -- "${POSITIONAL[@]+"${POSITIONAL[@]}"}"
COUNT="${1:-3}"
TURN_LIMIT="${2:-500}"
RESULTS_DIR="${3:-$REPO_ROOT/.local/batches/autoplay_batch}"
if ! [[ "$COUNT" =~ ^[0-9]+$ ]] || [ "$COUNT" -lt 1 ]; then
echo "ERROR: count must be a positive integer (got '$COUNT')" >&2
exit 2
fi
if ! [[ "$TURN_LIMIT" =~ ^[0-9]+$ ]] || [ "$TURN_LIMIT" -lt 1 ]; then
echo "ERROR: turn_limit must be a positive integer (got '$TURN_LIMIT')" >&2
exit 2
fi
AUTOPLAY_HOST="${AUTOPLAY_HOST:-}"
REMOTE_BATCH_ROOT="${REMOTE_BATCH_ROOT:-/tmp/@magic-civilization/builds}"
# SAFETY_TIMEOUT can be explicitly set via env var. If not set, compute a default
# based on TURN_LIMIT. For MCTS with GPU rollouts on large maps, add extra buffer.
if [ -z "${SAFETY_TIMEOUT_OVERRIDE:-}" ]; then
# Base calculation: 2× turn_limit + 300s overhead.
# For maps with MCTS (indicated by AI_USE_MCTS=true) or GPU batching,
# increase multiplier to 3× to account for deeper per-turn computation.
if [ "${AI_USE_MCTS:-}" = "true" ] || [ "${AI_GPU_ROLLOUT:-}" = "true" ]; then
SAFETY_TIMEOUT=$(( TURN_LIMIT * 3 + 300 ))
else
SAFETY_TIMEOUT=$(( TURN_LIMIT * 2 + 300 ))
fi
else
SAFETY_TIMEOUT="$SAFETY_TIMEOUT_OVERRIDE"
fi
PARALLEL="${PARALLEL:-1}"
LAUNCH_COOLDOWN="${LAUNCH_COOLDOWN:-0}"
# SEED_OFFSET shifts the seed range from [1..COUNT] to [1+OFFSET..COUNT+OFFSET].
# Used by multi-sweep aggregation (e.g. Task #10 B5 AI_PIN_PERSONALITY rotation)
# to produce disjoint seed numbers across sweeps that share a parent results dir.
SEED_OFFSET="${SEED_OFFSET:-0}"
if ! [[ "$PARALLEL" =~ ^[0-9]+$ ]] || [ "$PARALLEL" -lt 1 ]; then
echo "ERROR: PARALLEL must be a positive integer (got '$PARALLEL')" >&2
exit 2
fi
if ! [[ "$SEED_OFFSET" =~ ^[0-9]+$ ]]; then
echo "ERROR: SEED_OFFSET must be a non-negative integer (got '$SEED_OFFSET')" >&2
exit 2
fi
# Flatpak's sandboxed Godot resolves AUTO_PLAY_DIR against an unspecified CWD,
# not the caller's shell CWD — a relative path silently produces 0-byte
# meta.json / turn_stats.jsonl even when the game itself completes (game.log
# is fine because it's redirected host-side). realpath -m tolerates the path
# not existing yet; it will be mkdir'd just below. Also ensures the /tmp
# reject check that follows catches all forms (./tmp, ../tmp, etc).
RESULTS_DIR="$(realpath -m "$RESULTS_DIR")"
# Flatpak sandbox can't write to /tmp. Reject /tmp paths outright instead of
# silently redirecting — persistent output belongs under the repo.
if [[ "$RESULTS_DIR" == /tmp/* ]] || [[ "$RESULTS_DIR" == /private/tmp/* ]]; then
echo "ERROR: results_dir under /tmp is forbidden (wiped on reboot, flatpak sandbox hostile)." >&2
echo " Use a path under the repo (default: <repo>/.local/batches/) or \$HOME/tmp." >&2
exit 2
fi
mkdir -p "$RESULTS_DIR"
STAMP="$(date +%Y%m%d_%H%M%S)"
SEED_START=$(( SEED_OFFSET + 1 ))
SEED_END=$(( SEED_OFFSET + COUNT ))
echo "============================================================"
echo "Autoplay Batch: $COUNT games (seeds $SEED_START..$SEED_END), turn_limit=$TURN_LIMIT"
echo "Results: $RESULTS_DIR"
echo "Stamp: $STAMP"
if [ -n "$AUTOPLAY_HOST" ]; then
echo "Mode: remote SSH ($AUTOPLAY_HOST)"
else
echo "Mode: local flatpak"
fi
echo "Render: $RENDER_MODE"
echo "Parallel: $PARALLEL concurrent seed(s)"
echo "Safety timeout: ${SAFETY_TIMEOUT}s per game"
echo "============================================================"
# p1-45: Rebuild GDExtension before every local batch so the .so is never stale.
# Remote mode delegates to run_ap3.sh on the remote host, which is expected to
# have a fresh build already (or the caller ran build-gdext.sh manually).
# Uses a timestamped CARGO_TARGET_DIR to avoid collisions on multi-tenant hosts.
if [ -z "$AUTOPLAY_HOST" ]; then
echo "--- p1-45: rebuilding GDExtension before batch (CARGO_TARGET_DIR isolation) ---"
MC_BUILD_STAMP="$(date +%s)"
export CARGO_TARGET_DIR="/tmp/mc-build-${MC_BUILD_STAMP}/target"
(cd "$PROJECT_DIR/src/simulator" && bash build-gdext.sh)
unset CARGO_TARGET_DIR
echo "--- GDExtension rebuild complete ---"
fi
# Start the MCTS service (idempotent — no-op if already running).
# Runs only for local batches; remote batches rely on the remote host's
# run-services.sh invocation (or service already running on apricot).
if [ -z "$AUTOPLAY_HOST" ]; then
"$SCRIPT_DIR/run-services.sh" services:up 2>/dev/null || true
fi
# Resolve REMOTE_HOME once upfront (parallel workers all need it and racing for it breaks)
if [ -n "$AUTOPLAY_HOST" ]; then
REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')"
export REMOTE_HOME
fi
_kill_stale_procs() {
pkill -f "weston.*godot-headless" 2>/dev/null || true
pkill -f "org.godotengine.Godot" 2>/dev/null || true
sleep 0.5
}
_run_local() {
local seed="$1"
local game_dir="$2"
if ! command -v flatpak >/dev/null 2>&1; then
echo "ERROR: flatpak not installed. Set AUTOPLAY_HOST to run on a remote Linux host." >&2
exit 1
fi
# Skip unscoped pkill in parallel mode — would murder sibling workers.
# Parallel local runs assume no stray Godot is already running.
if [ "$PARALLEL" -le 1 ]; then
_kill_stale_procs
fi
# Per-seed clan rotation for `AI_PIN_PERSONALITY_P{0..4}`:
# without this, slot 0 always holds the same clan (whatever the caller
# set globally), and because `auto_play.gd` impersonates slot 0 with
# extra strategic helpers (rush-buy gold, attack-phase commit, formation
# orders) that one clan wins every game. Rotating which clan holds slot
# 0 across seeds spreads the autoplay-shaped opportunity. Reads
# `AI_PIN_PERSONALITY_P0..4` from caller env to learn the canonical
# ordering, then rotates by `(seed-1) % 5` so position-shift is
# deterministic per seed. Caller can suppress by setting
# `AI_PIN_ROTATION=off`.
local PIN_P0_ENV="${AI_PIN_PERSONALITY_P0:-}"
local PIN_P1_ENV="${AI_PIN_PERSONALITY_P1:-}"
local PIN_P2_ENV="${AI_PIN_PERSONALITY_P2:-}"
local PIN_P3_ENV="${AI_PIN_PERSONALITY_P3:-}"
local PIN_P4_ENV="${AI_PIN_PERSONALITY_P4:-}"
local PINS=("$PIN_P0_ENV" "$PIN_P1_ENV" "$PIN_P2_ENV" "$PIN_P3_ENV" "$PIN_P4_ENV")
local PIN_SEED_P0="$PIN_P0_ENV"
local PIN_SEED_P1="$PIN_P1_ENV"
local PIN_SEED_P2="$PIN_P2_ENV"
local PIN_SEED_P3="$PIN_P3_ENV"
local PIN_SEED_P4="$PIN_P4_ENV"
if [ "${AI_PIN_ROTATION:-on}" != "off" ] && [ -n "$PIN_P0_ENV" ] \
&& [ -n "$PIN_P1_ENV" ] && [ -n "$PIN_P2_ENV" ] \
&& [ -n "$PIN_P3_ENV" ] && [ -n "$PIN_P4_ENV" ]; then
local shift=$(( (seed - 1) % 5 ))
PIN_SEED_P0="${PINS[$(( (0 + shift) % 5 ))]}"
PIN_SEED_P1="${PINS[$(( (1 + shift) % 5 ))]}"
PIN_SEED_P2="${PINS[$(( (2 + shift) % 5 ))]}"
PIN_SEED_P3="${PINS[$(( (3 + shift) % 5 ))]}"
PIN_SEED_P4="${PINS[$(( (4 + shift) % 5 ))]}"
fi
local WESTON_PID=""
local FLATPAK_ENVS=(
"--env=AUTO_PLAY=true"
"--env=AUTO_PLAY_SEED=$seed"
"--env=AUTO_PLAY_TURN_LIMIT=$TURN_LIMIT"
"--env=AUTO_PLAY_DIR=$game_dir"
"--env=AP_RUN_ID=${STAMP}_seed$(printf '%03d' "$seed")"
"--env=AI_DIFFICULTY=${AI_DIFFICULTY:-}"
"--env=AI_DIFFICULTY_P0=${AI_DIFFICULTY_P0:-}"
"--env=AI_DIFFICULTY_P1=${AI_DIFFICULTY_P1:-}"
"--env=AI_PIN_PERSONALITY=${AI_PIN_PERSONALITY:-}"
"--env=AI_PIN_PERSONALITY_P0=${PIN_SEED_P0}"
"--env=AI_PIN_PERSONALITY_P1=${PIN_SEED_P1}"
"--env=AI_PIN_PERSONALITY_P2=${PIN_SEED_P2}"
"--env=AI_PIN_PERSONALITY_P3=${PIN_SEED_P3}"
"--env=AI_PIN_PERSONALITY_P4=${PIN_SEED_P4}"
"--env=MAP_SIZE=${MAP_SIZE:-}"
"--env=NUM_PLAYERS=${NUM_PLAYERS:-}"
"--env=AI_USE_MCTS=${AI_USE_MCTS:-}"
"--env=AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-}"
"--env=AI_MCTS_PRIORS=${AI_MCTS_PRIORS:-}"
# Rayon thread cap per Godot instance. Without this, rayon
# defaults to nproc (e.g. 64 on apricot); with PARALLEL=N
# instances each claiming 64 threads, the box has N*64 threads
# fighting over 64 cores → thrashing, each process effectively
# single-core. Caller (apricot-run.sh) computes nproc/PARALLEL
# so cores are divided fairly across instances.
"--env=RAYON_NUM_THREADS=${RAYON_NUM_THREADS:-}"
)
local GODOT_ARGS=("--path" "$GAME_DIR" "--rendering-method" "gl_compatibility")
if [ "$RENDER_MODE" = "weston" ]; then
if ! command -v weston >/dev/null 2>&1; then
echo "ERROR: --weston mode but weston not installed" >&2
exit 1
fi
WESTON_SOCKET="godot-headless-$$"
echo "[seed $seed] Starting weston (headless)..."
weston --backend=headless --no-config --socket="$WESTON_SOCKET" --width=1920 --height=1080 \
>"$game_dir/weston.log" 2>&1 &
WESTON_PID=$!
sleep 1
FLATPAK_ENVS+=(
"--socket=wayland"
"--unset-env=DISPLAY"
"--env=WAYLAND_DISPLAY=$WESTON_SOCKET"
"--filesystem=xdg-run/${WESTON_SOCKET}"
)
GODOT_ARGS+=("--display-driver" "wayland" "--rendering-driver" "opengl3")
else
GODOT_ARGS+=("--headless")
fi
echo "[seed $seed] Launching Godot ($RENDER_MODE, timeout ${SAFETY_TIMEOUT}s)..."
XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}" \
timeout "$SAFETY_TIMEOUT" flatpak run --user \
--filesystem=home \
"${FLATPAK_ENVS[@]}" \
org.godotengine.Godot "${GODOT_ARGS[@]}" \
>"$game_dir/game.log" 2>&1 || {
local exit_code=$?
echo "[seed $seed] Godot exited with code $exit_code" >&2
}
if [ -n "$WESTON_PID" ]; then
kill "$WESTON_PID" 2>/dev/null || true
wait "$WESTON_PID" 2>/dev/null || true
fi
}
_run_remote() {
local seed="$1"
local game_dir="$2"
echo "[seed $seed] Running via SSH on $AUTOPLAY_HOST..."
# REMOTE_BATCH_ROOT (default /tmp/@magic-civilization/builds) keeps batch
# artifacts OUTSIDE the remote host's dev working tree so they cannot
# trigger auto-commit divergence between hosts.
# Derive a unique remote dir from RESULTS_DIR's basename to avoid per-clan
# path collisions when multiple batches run in parallel with the same STAMP.
local results_basename
results_basename="$(basename "$RESULTS_DIR")"
local remote_game_dir="$REMOTE_BATCH_ROOT/.local/batches/${results_basename}/game_${STAMP}_seed${seed}"
local remote_runner="$REMOTE_HOME/bin/run_ap3.sh"
ssh "$AUTOPLAY_HOST" "
set -euo pipefail
mkdir -p '$remote_game_dir'
if [ ! -f '$remote_runner' ]; then
echo 'ERROR: $remote_runner not found on $AUTOPLAY_HOST (expected persistent runner in \$HOME/bin)' >&2
exit 1
fi
AUTO_PLAY=true \
AUTO_PLAY_SEED='$seed' \
AUTO_PLAY_TURN_LIMIT='$TURN_LIMIT' \
AUTO_PLAY_DIR='$remote_game_dir' \
AP_RUN_ID="${STAMP}_seed$(printf '%03d' "$seed")" \
AI_DIFFICULTY='${AI_DIFFICULTY:-}' \
AI_DIFFICULTY_P0='${AI_DIFFICULTY_P0:-}' \
AI_DIFFICULTY_P1='${AI_DIFFICULTY_P1:-}' \
AI_PIN_PERSONALITY='${AI_PIN_PERSONALITY:-}' \
MAP_SIZE='${MAP_SIZE:-}' \
NUM_PLAYERS='${NUM_PLAYERS:-}' \
AI_MCTS_PRIORS='${AI_MCTS_PRIORS:-}' \
RENDER_MODE='$RENDER_MODE' \
bash '$remote_runner' >'$remote_game_dir/game.log' 2>&1
" || {
echo "[seed $seed] SSH run exited with error — see $game_dir/game.log after scp" >&2
}
echo "[seed $seed] Fetching results from $AUTOPLAY_HOST..."
scp -r "$AUTOPLAY_HOST:$remote_game_dir/." "$game_dir/" \
>/dev/null 2>&1 || {
echo "WARNING: scp failed for seed $seed — result may be missing" >&2
}
}
# ── Main loop ────────────────────────────────────────────────────────────────
#
# _run_one dispatches one seed (remote or local) and writes a status line to
# $STATUS_DIR/seed_<N>.status. Parallel mode runs up to $PARALLEL workers
# concurrently using bash job control; the status files are read after
# `wait` to tally failures (avoids races on a shared FAILED_SEEDS array).
STATUS_DIR="$(mktemp -d -t autoplay-batch-status.XXXXXX)"
trap 'rm -rf "$STATUS_DIR"' EXIT
_run_one() {
local seed="$1"
local game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}"
mkdir -p "$game_dir"
echo "[$(date +%H:%M:%S)] [seed $seed] start → $game_dir"
if [ -n "$AUTOPLAY_HOST" ]; then
_run_remote "$seed" "$game_dir"
else
_run_local "$seed" "$game_dir"
fi
local meta_ok=false stats_ok=false
[ -f "$game_dir/meta.json" ] && meta_ok=true
[ -f "$game_dir/turn_stats.jsonl" ] && [ -s "$game_dir/turn_stats.jsonl" ] && stats_ok=true
if $meta_ok && $stats_ok; then
local line_count
line_count="$(wc -l < "$game_dir/turn_stats.jsonl" | tr -d ' ')"
echo "[$(date +%H:%M:%S)] [seed $seed] OK — $line_count turn_stats line(s)"
echo "OK $seed" > "$STATUS_DIR/seed_${seed}.status"
else
$meta_ok || echo "[seed $seed] MISSING meta.json" >&2
$stats_ok || echo "[seed $seed] MISSING or empty turn_stats.jsonl" >&2
echo "FAIL $seed" > "$STATUS_DIR/seed_${seed}.status"
fi
}
if [ "$PARALLEL" -le 1 ]; then
for seed in $(seq "$SEED_START" "$SEED_END"); do
_run_one "$seed"
done
else
echo "[$(date +%H:%M:%S)] Dispatching $COUNT seed(s) with up to $PARALLEL concurrent..."
for seed in $(seq "$SEED_START" "$SEED_END"); do
while [ "$(jobs -rp | wc -l | tr -d ' ')" -ge "$PARALLEL" ]; do
wait -n 2>/dev/null || break
done
_run_one "$seed" &
[ "${LAUNCH_COOLDOWN}" -gt 0 ] 2>/dev/null && sleep "$LAUNCH_COOLDOWN" || true
done
wait
fi
FAILED_SEEDS=()
for seed in $(seq "$SEED_START" "$SEED_END"); do
status_file="$STATUS_DIR/seed_${seed}.status"
if [ ! -f "$status_file" ]; then
echo "[seed $seed] MISSING status file (worker crashed before writing)" >&2
FAILED_SEEDS+=("$seed")
continue
fi
read -r status _ < "$status_file"
[ "$status" = "OK" ] || FAILED_SEEDS+=("$seed")
done
# ── Summary ──────────────────────────────────────────────────────────────────
echo ""
echo "============================================================"
PRODUCED=$(( COUNT - ${#FAILED_SEEDS[@]} ))
echo "Batch complete: $PRODUCED/$COUNT games produced turn_stats.jsonl"
echo "Results: $RESULTS_DIR"
echo "============================================================"
if [ ${#FAILED_SEEDS[@]} -gt 0 ]; then
echo "ERROR: No turn_stats.jsonl for seeds: ${FAILED_SEEDS[*]}" >&2
echo " Check game.log in each game dir for details." >&2
exit 1
fi
# ── E2E determinism gate ──────────────────────────────────────────────────────
# Runs e2e-determinism-check.sh on the results dir. Catches script errors that
# don't stop the game (map_placer out-of-bounds, nil-access warnings that
# accumulate silently) and fails the batch if any seed has non-allowlisted ERRORs.
E2E_CHECK="$SCRIPT_DIR/e2e-determinism-check.sh"
if [ -x "$E2E_CHECK" ]; then
echo ""
echo "Running E2E determinism gate..."
if ! "$E2E_CHECK" "$RESULTS_DIR" "$COUNT"; then
echo "ERROR: E2E gate failed — see above for details." >&2
exit 1
fi
else
echo "WARNING: $E2E_CHECK not found or not executable — skipping E2E gate" >&2
fi
exit 0