magicciv/tools/autoplay-batch.sh
Natalie afcbc0c93d fix(@projects/@magic-civilization): 🐛 resolve end-to-end determinism in processor.rs
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-17 06:09:17 -07:00

322 lines
12 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# autoplay-batch.sh — Run auto_play N times with different seeds and collect per-game output dirs.
#
# Usage: tools/autoplay-batch.sh [--weston] [count=3] [turn_limit=500] [results_dir]
#
# Output layout:
# <results_dir>/game_<stamp>_seed<N>/
# meta.json
# turn_stats.jsonl
# events.jsonl
# game.log
# weston.log (weston mode only)
# *.save (per-turn saves, if configured)
#
# Environment:
# AUTOPLAY_HOST — If set (e.g. "lilith@apricot.local"), run each game via SSH
# using run_ap3.sh on the remote host and scp results back.
# If unset, run locally via flatpak (Linux only).
# RENDER_MODE — "headless" (default) or "weston". --weston flag sets this.
# headless: Godot --headless, no display, no screenshots.
# weston: weston headless backend, software rendering, screenshots work.
# PARALLEL — Max seeds to run concurrently (default 1 = serial).
# Remote runner is concurrency-safe via scoped pkill per AUTO_PLAY_DIR.
# Apricot has 64 cores → PARALLEL=10 is a safe, ~10× wall-clock speedup.
# SEED_OFFSET — Shift the seed range from [1..COUNT] to [1+OFFSET..COUNT+OFFSET]
# (default 0). Use for multi-sweep runs that share a parent
# results_dir and need disjoint seed numbers (e.g. Task #10
# AI_PIN_PERSONALITY rotation: 5× batches at offsets 0,10,20,30,40).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
GAME_DIR="$PROJECT_DIR/src/game"
REPO_ROOT="$PROJECT_DIR"
RENDER_MODE="${RENDER_MODE:-headless}"
# Parse --weston flag before positional args
POSITIONAL=()
for arg in "$@"; do
case "$arg" in
--weston) RENDER_MODE="weston" ;;
--headless) RENDER_MODE="headless" ;;
*) POSITIONAL+=("$arg") ;;
esac
done
set -- "${POSITIONAL[@]+"${POSITIONAL[@]}"}"
COUNT="${1:-3}"
TURN_LIMIT="${2:-500}"
RESULTS_DIR="${3:-$REPO_ROOT/.local/batches/autoplay_batch}"
if ! [[ "$COUNT" =~ ^[0-9]+$ ]] || [ "$COUNT" -lt 1 ]; then
echo "ERROR: count must be a positive integer (got '$COUNT')" >&2
exit 2
fi
if ! [[ "$TURN_LIMIT" =~ ^[0-9]+$ ]] || [ "$TURN_LIMIT" -lt 1 ]; then
echo "ERROR: turn_limit must be a positive integer (got '$TURN_LIMIT')" >&2
exit 2
fi
AUTOPLAY_HOST="${AUTOPLAY_HOST:-}"
SAFETY_TIMEOUT=$(( TURN_LIMIT * 2 + 300 ))
PARALLEL="${PARALLEL:-1}"
# SEED_OFFSET shifts the seed range from [1..COUNT] to [1+OFFSET..COUNT+OFFSET].
# Used by multi-sweep aggregation (e.g. Task #10 B5 AI_PIN_PERSONALITY rotation)
# to produce disjoint seed numbers across sweeps that share a parent results dir.
SEED_OFFSET="${SEED_OFFSET:-0}"
if ! [[ "$PARALLEL" =~ ^[0-9]+$ ]] || [ "$PARALLEL" -lt 1 ]; then
echo "ERROR: PARALLEL must be a positive integer (got '$PARALLEL')" >&2
exit 2
fi
if ! [[ "$SEED_OFFSET" =~ ^[0-9]+$ ]]; then
echo "ERROR: SEED_OFFSET must be a non-negative integer (got '$SEED_OFFSET')" >&2
exit 2
fi
# Flatpak sandbox can't write to /tmp. Reject /tmp paths outright instead of
# silently redirecting — persistent output belongs under the repo.
if [[ "$RESULTS_DIR" == /tmp/* ]] || [[ "$RESULTS_DIR" == /private/tmp/* ]]; then
echo "ERROR: results_dir under /tmp is forbidden (wiped on reboot, flatpak sandbox hostile)." >&2
echo " Use a path under the repo (default: <repo>/.local/batches/) or \$HOME/tmp." >&2
exit 2
fi
mkdir -p "$RESULTS_DIR"
STAMP="$(date +%Y%m%d_%H%M%S)"
SEED_START=$(( SEED_OFFSET + 1 ))
SEED_END=$(( SEED_OFFSET + COUNT ))
echo "============================================================"
echo "Autoplay Batch: $COUNT games (seeds $SEED_START..$SEED_END), turn_limit=$TURN_LIMIT"
echo "Results: $RESULTS_DIR"
echo "Stamp: $STAMP"
if [ -n "$AUTOPLAY_HOST" ]; then
echo "Mode: remote SSH ($AUTOPLAY_HOST)"
else
echo "Mode: local flatpak"
fi
echo "Render: $RENDER_MODE"
echo "Parallel: $PARALLEL concurrent seed(s)"
echo "Safety timeout: ${SAFETY_TIMEOUT}s per game"
echo "============================================================"
# Resolve REMOTE_HOME once upfront (parallel workers all need it and racing for it breaks)
if [ -n "$AUTOPLAY_HOST" ]; then
REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')"
export REMOTE_HOME
fi
_kill_stale_procs() {
pkill -f "weston.*godot-headless" 2>/dev/null || true
pkill -f "org.godotengine.Godot" 2>/dev/null || true
sleep 0.5
}
_run_local() {
local seed="$1"
local game_dir="$2"
if ! command -v flatpak >/dev/null 2>&1; then
echo "ERROR: flatpak not installed. Set AUTOPLAY_HOST to run on a remote Linux host." >&2
exit 1
fi
# Skip unscoped pkill in parallel mode — would murder sibling workers.
# Parallel local runs assume no stray Godot is already running.
if [ "$PARALLEL" -le 1 ]; then
_kill_stale_procs
fi
local WESTON_PID=""
local FLATPAK_ENVS=(
"--env=AUTO_PLAY=true"
"--env=AUTO_PLAY_SEED=$seed"
"--env=AUTO_PLAY_TURN_LIMIT=$TURN_LIMIT"
"--env=AUTO_PLAY_DIR=$game_dir"
"--env=AI_DIFFICULTY=${AI_DIFFICULTY:-}"
"--env=AI_PIN_PERSONALITY=${AI_PIN_PERSONALITY:-}"
)
local GODOT_ARGS=("--path" "$GAME_DIR" "--rendering-method" "gl_compatibility")
if [ "$RENDER_MODE" = "weston" ]; then
if ! command -v weston >/dev/null 2>&1; then
echo "ERROR: --weston mode but weston not installed" >&2
exit 1
fi
WESTON_SOCKET="godot-headless-$$"
echo "[seed $seed] Starting weston (headless)..."
weston --backend=headless --socket="$WESTON_SOCKET" --width=1920 --height=1080 \
>"$game_dir/weston.log" 2>&1 &
WESTON_PID=$!
sleep 1
FLATPAK_ENVS+=(
"--socket=wayland"
"--env=WAYLAND_DISPLAY=$WESTON_SOCKET"
"--filesystem=xdg-run/${WESTON_SOCKET}"
)
else
GODOT_ARGS+=("--headless")
fi
echo "[seed $seed] Launching Godot ($RENDER_MODE, timeout ${SAFETY_TIMEOUT}s)..."
XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}" \
timeout "$SAFETY_TIMEOUT" flatpak run --user \
--filesystem=home \
"${FLATPAK_ENVS[@]}" \
org.godotengine.Godot "${GODOT_ARGS[@]}" \
>"$game_dir/game.log" 2>&1 || {
local exit_code=$?
echo "[seed $seed] Godot exited with code $exit_code" >&2
}
if [ -n "$WESTON_PID" ]; then
kill "$WESTON_PID" 2>/dev/null || true
wait "$WESTON_PID" 2>/dev/null || true
fi
}
_run_remote() {
local seed="$1"
local game_dir="$2"
echo "[seed $seed] Running via SSH on $AUTOPLAY_HOST..."
# REMOTE_HOME is resolved once upfront by the main loop and exported.
# Derive a unique remote dir from RESULTS_DIR's basename to avoid per-clan
# path collisions when multiple batches run in parallel with the same STAMP.
local results_basename
results_basename="$(basename "$RESULTS_DIR")"
local remote_game_dir="$REMOTE_HOME/Code/@projects/@magic-civilization/.local/batches/${results_basename}/game_${STAMP}_seed${seed}"
local remote_runner="$REMOTE_HOME/bin/run_ap3.sh"
ssh "$AUTOPLAY_HOST" "
set -euo pipefail
mkdir -p '$remote_game_dir'
if [ ! -f '$remote_runner' ]; then
echo 'ERROR: $remote_runner not found on $AUTOPLAY_HOST (expected persistent runner in \$HOME/bin)' >&2
exit 1
fi
AUTO_PLAY=true \
AUTO_PLAY_SEED='$seed' \
AUTO_PLAY_TURN_LIMIT='$TURN_LIMIT' \
AUTO_PLAY_DIR='$remote_game_dir' \
AI_DIFFICULTY='${AI_DIFFICULTY:-}' \
AI_PIN_PERSONALITY='${AI_PIN_PERSONALITY:-}' \
RENDER_MODE='$RENDER_MODE' \
bash '$remote_runner' >'$remote_game_dir/game.log' 2>&1
" || {
echo "[seed $seed] SSH run exited with error — see $game_dir/game.log after scp" >&2
}
echo "[seed $seed] Fetching results from $AUTOPLAY_HOST..."
scp -r "$AUTOPLAY_HOST:$remote_game_dir/." "$game_dir/" \
>/dev/null 2>&1 || {
echo "WARNING: scp failed for seed $seed — result may be missing" >&2
}
}
# ── Main loop ────────────────────────────────────────────────────────────────
#
# _run_one dispatches one seed (remote or local) and writes a status line to
# $STATUS_DIR/seed_<N>.status. Parallel mode runs up to $PARALLEL workers
# concurrently using bash job control; the status files are read after
# `wait` to tally failures (avoids races on a shared FAILED_SEEDS array).
STATUS_DIR="$(mktemp -d -t autoplay-batch-status.XXXXXX)"
trap 'rm -rf "$STATUS_DIR"' EXIT
_run_one() {
local seed="$1"
local game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}"
mkdir -p "$game_dir"
echo "[$(date +%H:%M:%S)] [seed $seed] start → $game_dir"
if [ -n "$AUTOPLAY_HOST" ]; then
_run_remote "$seed" "$game_dir"
else
_run_local "$seed" "$game_dir"
fi
local meta_ok=false stats_ok=false
[ -f "$game_dir/meta.json" ] && meta_ok=true
[ -f "$game_dir/turn_stats.jsonl" ] && [ -s "$game_dir/turn_stats.jsonl" ] && stats_ok=true
if $meta_ok && $stats_ok; then
local line_count
line_count="$(wc -l < "$game_dir/turn_stats.jsonl" | tr -d ' ')"
echo "[$(date +%H:%M:%S)] [seed $seed] OK — $line_count turn_stats line(s)"
echo "OK $seed" > "$STATUS_DIR/seed_${seed}.status"
else
$meta_ok || echo "[seed $seed] MISSING meta.json" >&2
$stats_ok || echo "[seed $seed] MISSING or empty turn_stats.jsonl" >&2
echo "FAIL $seed" > "$STATUS_DIR/seed_${seed}.status"
fi
}
if [ "$PARALLEL" -le 1 ]; then
for seed in $(seq "$SEED_START" "$SEED_END"); do
_run_one "$seed"
done
else
echo "[$(date +%H:%M:%S)] Dispatching $COUNT seed(s) with up to $PARALLEL concurrent..."
for seed in $(seq "$SEED_START" "$SEED_END"); do
while [ "$(jobs -rp | wc -l | tr -d ' ')" -ge "$PARALLEL" ]; do
wait -n 2>/dev/null || break
done
_run_one "$seed" &
done
wait
fi
FAILED_SEEDS=()
for seed in $(seq "$SEED_START" "$SEED_END"); do
status_file="$STATUS_DIR/seed_${seed}.status"
if [ ! -f "$status_file" ]; then
echo "[seed $seed] MISSING status file (worker crashed before writing)" >&2
FAILED_SEEDS+=("$seed")
continue
fi
read -r status _ < "$status_file"
[ "$status" = "OK" ] || FAILED_SEEDS+=("$seed")
done
# ── Summary ──────────────────────────────────────────────────────────────────
echo ""
echo "============================================================"
PRODUCED=$(( COUNT - ${#FAILED_SEEDS[@]} ))
echo "Batch complete: $PRODUCED/$COUNT games produced turn_stats.jsonl"
echo "Results: $RESULTS_DIR"
echo "============================================================"
if [ ${#FAILED_SEEDS[@]} -gt 0 ]; then
echo "ERROR: No turn_stats.jsonl for seeds: ${FAILED_SEEDS[*]}" >&2
echo " Check game.log in each game dir for details." >&2
exit 1
fi
# ── E2E determinism gate ──────────────────────────────────────────────────────
# Runs e2e-determinism-check.sh on the results dir. Catches script errors that
# don't stop the game (map_placer out-of-bounds, nil-access warnings that
# accumulate silently) and fails the batch if any seed has non-allowlisted ERRORs.
E2E_CHECK="$SCRIPT_DIR/e2e-determinism-check.sh"
if [ -x "$E2E_CHECK" ]; then
echo ""
echo "Running E2E determinism gate..."
if ! "$E2E_CHECK" "$RESULTS_DIR" "$COUNT"; then
echo "ERROR: E2E gate failed — see above for details." >&2
exit 1
fi
else
echo "WARNING: $E2E_CHECK not found or not executable — skipping E2E gate" >&2
fi
exit 0