feat(@projects): add parallel execution support

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-04-16 16:24:49 -07:00
parent 626b20b673
commit 1a31596a94

View file

@ -19,6 +19,9 @@
# RENDER_MODE — "headless" (default) or "weston". --weston flag sets this.
# headless: Godot --headless, no display, no screenshots.
# weston: weston headless backend, software rendering, screenshots work.
# PARALLEL — Max seeds to run concurrently (default 1 = serial).
# Remote runner is concurrency-safe via scoped pkill per AUTO_PLAY_DIR.
# Apricot has 64 cores → PARALLEL=10 is a safe, ~10× wall-clock speedup.
set -euo pipefail
@ -55,6 +58,12 @@ fi
AUTOPLAY_HOST="${AUTOPLAY_HOST:-}"
SAFETY_TIMEOUT=$(( TURN_LIMIT * 2 + 300 ))
PARALLEL="${PARALLEL:-1}"
if ! [[ "$PARALLEL" =~ ^[0-9]+$ ]] || [ "$PARALLEL" -lt 1 ]; then
echo "ERROR: PARALLEL must be a positive integer (got '$PARALLEL')" >&2
exit 2
fi
# Flatpak sandbox can't write to /tmp. Reject /tmp paths outright instead of
# silently redirecting — persistent output belongs under the repo.
@ -78,9 +87,16 @@ else
echo "Mode: local flatpak"
fi
echo "Render: $RENDER_MODE"
echo "Parallel: $PARALLEL concurrent seed(s)"
echo "Safety timeout: ${SAFETY_TIMEOUT}s per game"
echo "============================================================"
# Resolve REMOTE_HOME once upfront (parallel workers all need it and racing for it breaks)
if [ -n "$AUTOPLAY_HOST" ]; then
REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')"
export REMOTE_HOME
fi
_kill_stale_procs() {
pkill -f "weston.*godot-headless" 2>/dev/null || true
pkill -f "org.godotengine.Godot" 2>/dev/null || true
@ -96,7 +112,11 @@ _run_local() {
exit 1
fi
_kill_stale_procs
# Skip unscoped pkill in parallel mode — would murder sibling workers.
# Parallel local runs assume no stray Godot is already running.
if [ "$PARALLEL" -le 1 ]; then
_kill_stale_procs
fi
local WESTON_PID=""
local FLATPAK_ENVS=(
@ -151,10 +171,7 @@ _run_remote() {
echo "[seed $seed] Running via SSH on $AUTOPLAY_HOST..."
# Resolve remote $HOME once so we don't fight quoting rules
if [ -z "${REMOTE_HOME:-}" ]; then
REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')"
fi
# REMOTE_HOME is resolved once upfront by the main loop and exported
local remote_game_dir="$REMOTE_HOME/Code/@projects/@magic-civilization/.local/batches/autoplay_batch/game_${STAMP}_seed${seed}"
local remote_runner="$REMOTE_HOME/bin/run_ap3.sh"
@ -184,15 +201,20 @@ _run_remote() {
}
# ── Main loop ────────────────────────────────────────────────────────────────
#
# _run_one dispatches one seed (remote or local) and writes a status line to
# $STATUS_DIR/seed_<N>.status. Parallel mode runs up to $PARALLEL workers
# concurrently using bash job control; the status files are read after
# `wait` to tally failures (avoids races on a shared FAILED_SEEDS array).
FAILED_SEEDS=()
STATUS_DIR="$(mktemp -d -t autoplay-batch-status.XXXXXX)"
trap 'rm -rf "$STATUS_DIR"' EXIT
for seed in $(seq 1 "$COUNT"); do
game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}"
_run_one() {
local seed="$1"
local game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}"
mkdir -p "$game_dir"
echo ""
echo "[$(date +%H:%M:%S)] === Game $seed/$COUNT (seed=$seed) ==="
echo "[seed $seed] Output dir: $game_dir"
echo "[$(date +%H:%M:%S)] [seed $seed] start → $game_dir"
if [ -n "$AUTOPLAY_HOST" ]; then
_run_remote "$seed" "$game_dir"
@ -200,24 +222,47 @@ for seed in $(seq 1 "$COUNT"); do
_run_local "$seed" "$game_dir"
fi
# Check for meta.json + non-empty turn_stats.jsonl as canonical success indicators
meta_ok=false
stats_ok=false
local meta_ok=false stats_ok=false
[ -f "$game_dir/meta.json" ] && meta_ok=true
[ -f "$game_dir/turn_stats.jsonl" ] && [ -s "$game_dir/turn_stats.jsonl" ] && stats_ok=true
if $meta_ok && $stats_ok; then
local line_count
line_count="$(wc -l < "$game_dir/turn_stats.jsonl" | tr -d ' ')"
echo "[seed $seed] OK — meta.json present, turn_stats.jsonl has $line_count line(s)"
echo "[$(date +%H:%M:%S)] [seed $seed] OK — $line_count turn_stats line(s)"
echo "OK $seed" > "$STATUS_DIR/seed_${seed}.status"
else
if ! $meta_ok; then
echo "[seed $seed] MISSING meta.json" >&2
fi
if ! $stats_ok; then
echo "[seed $seed] MISSING or empty turn_stats.jsonl (game may have crashed)" >&2
fi
FAILED_SEEDS+=("$seed")
$meta_ok || echo "[seed $seed] MISSING meta.json" >&2
$stats_ok || echo "[seed $seed] MISSING or empty turn_stats.jsonl" >&2
echo "FAIL $seed" > "$STATUS_DIR/seed_${seed}.status"
fi
}
if [ "$PARALLEL" -le 1 ]; then
for seed in $(seq 1 "$COUNT"); do
_run_one "$seed"
done
else
echo "[$(date +%H:%M:%S)] Dispatching $COUNT seed(s) with up to $PARALLEL concurrent..."
for seed in $(seq 1 "$COUNT"); do
while [ "$(jobs -rp | wc -l | tr -d ' ')" -ge "$PARALLEL" ]; do
wait -n 2>/dev/null || break
done
_run_one "$seed" &
done
wait
fi
FAILED_SEEDS=()
for seed in $(seq 1 "$COUNT"); do
status_file="$STATUS_DIR/seed_${seed}.status"
if [ ! -f "$status_file" ]; then
echo "[seed $seed] MISSING status file (worker crashed before writing)" >&2
FAILED_SEEDS+=("$seed")
continue
fi
read -r status _ < "$status_file"
[ "$status" = "OK" ] || FAILED_SEEDS+=("$seed")
done
# ── Summary ──────────────────────────────────────────────────────────────────