From 1a31596a9497abb9fbf1a71d2bd5a2fbfe837629 Mon Sep 17 00:00:00 2001 From: Natalie Date: Thu, 16 Apr 2026 16:24:49 -0700 Subject: [PATCH] =?UTF-8?q?feat(@projects):=20=E2=9C=A8=20add=20parallel?= =?UTF-8?q?=20execution=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- tools/autoplay-batch.sh | 89 +++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 22 deletions(-) diff --git a/tools/autoplay-batch.sh b/tools/autoplay-batch.sh index 262197ef..728d5a97 100755 --- a/tools/autoplay-batch.sh +++ b/tools/autoplay-batch.sh @@ -19,6 +19,9 @@ # RENDER_MODE — "headless" (default) or "weston". --weston flag sets this. # headless: Godot --headless, no display, no screenshots. # weston: weston headless backend, software rendering, screenshots work. +# PARALLEL — Max seeds to run concurrently (default 1 = serial). +# Remote runner is concurrency-safe via scoped pkill per AUTO_PLAY_DIR. +# Apricot has 64 cores → PARALLEL=10 is a safe, ~10× wall-clock speedup. set -euo pipefail @@ -55,6 +58,12 @@ fi AUTOPLAY_HOST="${AUTOPLAY_HOST:-}" SAFETY_TIMEOUT=$(( TURN_LIMIT * 2 + 300 )) +PARALLEL="${PARALLEL:-1}" + +if ! [[ "$PARALLEL" =~ ^[0-9]+$ ]] || [ "$PARALLEL" -lt 1 ]; then + echo "ERROR: PARALLEL must be a positive integer (got '$PARALLEL')" >&2 + exit 2 +fi # Flatpak sandbox can't write to /tmp. Reject /tmp paths outright instead of # silently redirecting — persistent output belongs under the repo. @@ -78,9 +87,16 @@ else echo "Mode: local flatpak" fi echo "Render: $RENDER_MODE" +echo "Parallel: $PARALLEL concurrent seed(s)" echo "Safety timeout: ${SAFETY_TIMEOUT}s per game" echo "============================================================" +# Resolve REMOTE_HOME once upfront (parallel workers all need it and racing for it breaks) +if [ -n "$AUTOPLAY_HOST" ]; then + REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')" + export REMOTE_HOME +fi + _kill_stale_procs() { pkill -f "weston.*godot-headless" 2>/dev/null || true pkill -f "org.godotengine.Godot" 2>/dev/null || true @@ -96,7 +112,11 @@ _run_local() { exit 1 fi - _kill_stale_procs + # Skip unscoped pkill in parallel mode — would murder sibling workers. + # Parallel local runs assume no stray Godot is already running. + if [ "$PARALLEL" -le 1 ]; then + _kill_stale_procs + fi local WESTON_PID="" local FLATPAK_ENVS=( @@ -151,10 +171,7 @@ _run_remote() { echo "[seed $seed] Running via SSH on $AUTOPLAY_HOST..." - # Resolve remote $HOME once so we don't fight quoting rules - if [ -z "${REMOTE_HOME:-}" ]; then - REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')" - fi + # REMOTE_HOME is resolved once upfront by the main loop and exported local remote_game_dir="$REMOTE_HOME/Code/@projects/@magic-civilization/.local/batches/autoplay_batch/game_${STAMP}_seed${seed}" local remote_runner="$REMOTE_HOME/bin/run_ap3.sh" @@ -184,15 +201,20 @@ _run_remote() { } # ── Main loop ──────────────────────────────────────────────────────────────── +# +# _run_one dispatches one seed (remote or local) and writes a status line to +# $STATUS_DIR/seed_.status. Parallel mode runs up to $PARALLEL workers +# concurrently using bash job control; the status files are read after +# `wait` to tally failures (avoids races on a shared FAILED_SEEDS array). -FAILED_SEEDS=() +STATUS_DIR="$(mktemp -d -t autoplay-batch-status.XXXXXX)" +trap 'rm -rf "$STATUS_DIR"' EXIT -for seed in $(seq 1 "$COUNT"); do - game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}" +_run_one() { + local seed="$1" + local game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}" mkdir -p "$game_dir" - echo "" - echo "[$(date +%H:%M:%S)] === Game $seed/$COUNT (seed=$seed) ===" - echo "[seed $seed] Output dir: $game_dir" + echo "[$(date +%H:%M:%S)] [seed $seed] start → $game_dir" if [ -n "$AUTOPLAY_HOST" ]; then _run_remote "$seed" "$game_dir" @@ -200,24 +222,47 @@ for seed in $(seq 1 "$COUNT"); do _run_local "$seed" "$game_dir" fi - # Check for meta.json + non-empty turn_stats.jsonl as canonical success indicators - meta_ok=false - stats_ok=false + local meta_ok=false stats_ok=false [ -f "$game_dir/meta.json" ] && meta_ok=true [ -f "$game_dir/turn_stats.jsonl" ] && [ -s "$game_dir/turn_stats.jsonl" ] && stats_ok=true if $meta_ok && $stats_ok; then + local line_count line_count="$(wc -l < "$game_dir/turn_stats.jsonl" | tr -d ' ')" - echo "[seed $seed] OK — meta.json present, turn_stats.jsonl has $line_count line(s)" + echo "[$(date +%H:%M:%S)] [seed $seed] OK — $line_count turn_stats line(s)" + echo "OK $seed" > "$STATUS_DIR/seed_${seed}.status" else - if ! $meta_ok; then - echo "[seed $seed] MISSING meta.json" >&2 - fi - if ! $stats_ok; then - echo "[seed $seed] MISSING or empty turn_stats.jsonl (game may have crashed)" >&2 - fi - FAILED_SEEDS+=("$seed") + $meta_ok || echo "[seed $seed] MISSING meta.json" >&2 + $stats_ok || echo "[seed $seed] MISSING or empty turn_stats.jsonl" >&2 + echo "FAIL $seed" > "$STATUS_DIR/seed_${seed}.status" fi +} + +if [ "$PARALLEL" -le 1 ]; then + for seed in $(seq 1 "$COUNT"); do + _run_one "$seed" + done +else + echo "[$(date +%H:%M:%S)] Dispatching $COUNT seed(s) with up to $PARALLEL concurrent..." + for seed in $(seq 1 "$COUNT"); do + while [ "$(jobs -rp | wc -l | tr -d ' ')" -ge "$PARALLEL" ]; do + wait -n 2>/dev/null || break + done + _run_one "$seed" & + done + wait +fi + +FAILED_SEEDS=() +for seed in $(seq 1 "$COUNT"); do + status_file="$STATUS_DIR/seed_${seed}.status" + if [ ! -f "$status_file" ]; then + echo "[seed $seed] MISSING status file (worker crashed before writing)" >&2 + FAILED_SEEDS+=("$seed") + continue + fi + read -r status _ < "$status_file" + [ "$status" = "OK" ] || FAILED_SEEDS+=("$seed") done # ── Summary ──────────────────────────────────────────────────────────────────