feat(@projects): ✨ add parallel execution support
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
626b20b673
commit
1a31596a94
1 changed files with 67 additions and 22 deletions
|
|
@ -19,6 +19,9 @@
|
|||
# RENDER_MODE — "headless" (default) or "weston". --weston flag sets this.
|
||||
# headless: Godot --headless, no display, no screenshots.
|
||||
# weston: weston headless backend, software rendering, screenshots work.
|
||||
# PARALLEL — Max seeds to run concurrently (default 1 = serial).
|
||||
# Remote runner is concurrency-safe via scoped pkill per AUTO_PLAY_DIR.
|
||||
# Apricot has 64 cores → PARALLEL=10 is a safe, ~10× wall-clock speedup.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
|
|
@ -55,6 +58,12 @@ fi
|
|||
|
||||
AUTOPLAY_HOST="${AUTOPLAY_HOST:-}"
|
||||
SAFETY_TIMEOUT=$(( TURN_LIMIT * 2 + 300 ))
|
||||
PARALLEL="${PARALLEL:-1}"
|
||||
|
||||
if ! [[ "$PARALLEL" =~ ^[0-9]+$ ]] || [ "$PARALLEL" -lt 1 ]; then
|
||||
echo "ERROR: PARALLEL must be a positive integer (got '$PARALLEL')" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Flatpak sandbox can't write to /tmp. Reject /tmp paths outright instead of
|
||||
# silently redirecting — persistent output belongs under the repo.
|
||||
|
|
@ -78,9 +87,16 @@ else
|
|||
echo "Mode: local flatpak"
|
||||
fi
|
||||
echo "Render: $RENDER_MODE"
|
||||
echo "Parallel: $PARALLEL concurrent seed(s)"
|
||||
echo "Safety timeout: ${SAFETY_TIMEOUT}s per game"
|
||||
echo "============================================================"
|
||||
|
||||
# Resolve REMOTE_HOME once upfront (parallel workers all need it and racing for it breaks)
|
||||
if [ -n "$AUTOPLAY_HOST" ]; then
|
||||
REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')"
|
||||
export REMOTE_HOME
|
||||
fi
|
||||
|
||||
_kill_stale_procs() {
|
||||
pkill -f "weston.*godot-headless" 2>/dev/null || true
|
||||
pkill -f "org.godotengine.Godot" 2>/dev/null || true
|
||||
|
|
@ -96,7 +112,11 @@ _run_local() {
|
|||
exit 1
|
||||
fi
|
||||
|
||||
_kill_stale_procs
|
||||
# Skip unscoped pkill in parallel mode — would murder sibling workers.
|
||||
# Parallel local runs assume no stray Godot is already running.
|
||||
if [ "$PARALLEL" -le 1 ]; then
|
||||
_kill_stale_procs
|
||||
fi
|
||||
|
||||
local WESTON_PID=""
|
||||
local FLATPAK_ENVS=(
|
||||
|
|
@ -151,10 +171,7 @@ _run_remote() {
|
|||
|
||||
echo "[seed $seed] Running via SSH on $AUTOPLAY_HOST..."
|
||||
|
||||
# Resolve remote $HOME once so we don't fight quoting rules
|
||||
if [ -z "${REMOTE_HOME:-}" ]; then
|
||||
REMOTE_HOME="$(ssh "$AUTOPLAY_HOST" 'echo "$HOME"')"
|
||||
fi
|
||||
# REMOTE_HOME is resolved once upfront by the main loop and exported
|
||||
local remote_game_dir="$REMOTE_HOME/Code/@projects/@magic-civilization/.local/batches/autoplay_batch/game_${STAMP}_seed${seed}"
|
||||
local remote_runner="$REMOTE_HOME/bin/run_ap3.sh"
|
||||
|
||||
|
|
@ -184,15 +201,20 @@ _run_remote() {
|
|||
}
|
||||
|
||||
# ── Main loop ────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# _run_one dispatches one seed (remote or local) and writes a status line to
|
||||
# $STATUS_DIR/seed_<N>.status. Parallel mode runs up to $PARALLEL workers
|
||||
# concurrently using bash job control; the status files are read after
|
||||
# `wait` to tally failures (avoids races on a shared FAILED_SEEDS array).
|
||||
|
||||
FAILED_SEEDS=()
|
||||
STATUS_DIR="$(mktemp -d -t autoplay-batch-status.XXXXXX)"
|
||||
trap 'rm -rf "$STATUS_DIR"' EXIT
|
||||
|
||||
for seed in $(seq 1 "$COUNT"); do
|
||||
game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}"
|
||||
_run_one() {
|
||||
local seed="$1"
|
||||
local game_dir="$RESULTS_DIR/game_${STAMP}_seed${seed}"
|
||||
mkdir -p "$game_dir"
|
||||
echo ""
|
||||
echo "[$(date +%H:%M:%S)] === Game $seed/$COUNT (seed=$seed) ==="
|
||||
echo "[seed $seed] Output dir: $game_dir"
|
||||
echo "[$(date +%H:%M:%S)] [seed $seed] start → $game_dir"
|
||||
|
||||
if [ -n "$AUTOPLAY_HOST" ]; then
|
||||
_run_remote "$seed" "$game_dir"
|
||||
|
|
@ -200,24 +222,47 @@ for seed in $(seq 1 "$COUNT"); do
|
|||
_run_local "$seed" "$game_dir"
|
||||
fi
|
||||
|
||||
# Check for meta.json + non-empty turn_stats.jsonl as canonical success indicators
|
||||
meta_ok=false
|
||||
stats_ok=false
|
||||
local meta_ok=false stats_ok=false
|
||||
[ -f "$game_dir/meta.json" ] && meta_ok=true
|
||||
[ -f "$game_dir/turn_stats.jsonl" ] && [ -s "$game_dir/turn_stats.jsonl" ] && stats_ok=true
|
||||
|
||||
if $meta_ok && $stats_ok; then
|
||||
local line_count
|
||||
line_count="$(wc -l < "$game_dir/turn_stats.jsonl" | tr -d ' ')"
|
||||
echo "[seed $seed] OK — meta.json present, turn_stats.jsonl has $line_count line(s)"
|
||||
echo "[$(date +%H:%M:%S)] [seed $seed] OK — $line_count turn_stats line(s)"
|
||||
echo "OK $seed" > "$STATUS_DIR/seed_${seed}.status"
|
||||
else
|
||||
if ! $meta_ok; then
|
||||
echo "[seed $seed] MISSING meta.json" >&2
|
||||
fi
|
||||
if ! $stats_ok; then
|
||||
echo "[seed $seed] MISSING or empty turn_stats.jsonl (game may have crashed)" >&2
|
||||
fi
|
||||
FAILED_SEEDS+=("$seed")
|
||||
$meta_ok || echo "[seed $seed] MISSING meta.json" >&2
|
||||
$stats_ok || echo "[seed $seed] MISSING or empty turn_stats.jsonl" >&2
|
||||
echo "FAIL $seed" > "$STATUS_DIR/seed_${seed}.status"
|
||||
fi
|
||||
}
|
||||
|
||||
if [ "$PARALLEL" -le 1 ]; then
|
||||
for seed in $(seq 1 "$COUNT"); do
|
||||
_run_one "$seed"
|
||||
done
|
||||
else
|
||||
echo "[$(date +%H:%M:%S)] Dispatching $COUNT seed(s) with up to $PARALLEL concurrent..."
|
||||
for seed in $(seq 1 "$COUNT"); do
|
||||
while [ "$(jobs -rp | wc -l | tr -d ' ')" -ge "$PARALLEL" ]; do
|
||||
wait -n 2>/dev/null || break
|
||||
done
|
||||
_run_one "$seed" &
|
||||
done
|
||||
wait
|
||||
fi
|
||||
|
||||
FAILED_SEEDS=()
|
||||
for seed in $(seq 1 "$COUNT"); do
|
||||
status_file="$STATUS_DIR/seed_${seed}.status"
|
||||
if [ ! -f "$status_file" ]; then
|
||||
echo "[seed $seed] MISSING status file (worker crashed before writing)" >&2
|
||||
FAILED_SEEDS+=("$seed")
|
||||
continue
|
||||
fi
|
||||
read -r status _ < "$status_file"
|
||||
[ "$status" = "OK" ] || FAILED_SEEDS+=("$seed")
|
||||
done
|
||||
|
||||
# ── Summary ──────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue