#!/usr/bin/env bash # apricot-run.sh — Isolated build + batch pipeline on apricot. # # apricot is a multi-tenant RUN host. We do NOT touch ~/Code/@projects/@magic-civilization # on apricot — that's another developer's workspace. Instead: # # 1. Rsync this EDIT-host source tree to /tmp/mc-/ on apricot. # 2. Build (cargo) in /tmp/mc-/, target dir stays there (ephemeral). # 3. Run the batch with RESULTS_DIR under $HOME/.cache/mc-batches// # (persistent, XDG cache convention, flatpak-visible via --filesystem=home). # 4. Fetch verdict JSON back to EDIT host for review. # # Usage: # scripts/apricot-run.sh smoke [seeds=10] [turns=300] # scripts/apricot-run.sh clan [seeds=10] [turns=300] # scripts/apricot-run.sh gpu-walltime [seeds=10] [turns=300] # # Environment: # APRICOT_SSH_ALIAS — ssh alias for the RUN host (default: apricot). # STAMP — override the timestamp (for reproducing a specific run). set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" APRICOT="${APRICOT_SSH_ALIAS:-apricot}" STAMP="${STAMP:-$(date +%Y%m%d_%H%M%S)}" # ── Load .env / .env.local so USE_MAX_CORES + MIN_CORES + AI_GPU_ROLLOUT # propagate into the resource policy below. .env.local wins over .env. for envfile in "${PROJECT_DIR}/.env" "${PROJECT_DIR}/.env.local"; do if [[ -f "${envfile}" ]]; then set -a; source "${envfile}"; set +a fi done # MODE + positional args resolved early so the resource-policy block can # peek at the seed count (which differs per mode — for `clan` it's $2 # because $1 is the clan_id; for smoke/gpu-walltime it's $1). MODE="${1:?usage: apricot-run.sh [args]}" shift || true # ── Resource policy for PARALLEL + RAYON_NUM_THREADS ───────────────── # Each Godot instance spawns its own rayon thread pool for MCTS rollouts; # rayon defaults to nproc unless RAYON_NUM_THREADS is set. If PARALLEL # instances each claim all nproc threads, we get PARALLEL*nproc threads # fighting over nproc cores → thrashing, each process effectively single # core. Better: PARALLEL = number of seeds (one instance each), and # RAYON_NUM_THREADS = nproc / PARALLEL so the box is saturated evenly. case "${MODE}" in clan) _seed_count_peek="${2:-10}" ;; # $1 is clan_id, $2 is seeds *) _seed_count_peek="${1:-10}" ;; # smoke, gpu-walltime esac NPROC="$(ssh "${APRICOT}" nproc 2>/dev/null || echo 8)" if [[ -n "${PARALLEL:-}" ]]; then PARALLEL_EFFECTIVE="${PARALLEL}" PARALLEL_SOURCE="env override" elif [[ "${USE_MAX_CORES:-false}" == "true" ]]; then # One instance per seed — up to NPROC. More instances than that # would queue serially anyway (NPROC concurrent Godots max). PARALLEL_EFFECTIVE="$(( _seed_count_peek < NPROC ? _seed_count_peek : NPROC ))" PARALLEL_SOURCE="USE_MAX_CORES=true → min(seeds=${_seed_count_peek}, nproc=${NPROC})" else PARALLEL_EFFECTIVE="${MIN_CORES:-4}" PARALLEL_SOURCE="MIN_CORES default" fi export PARALLEL="${PARALLEL_EFFECTIVE}" # RAYON_NUM_THREADS per Godot instance = fair share of cores. if [[ -n "${RAYON_NUM_THREADS:-}" ]]; then RAYON_SOURCE="env override" else if [[ "${PARALLEL_EFFECTIVE}" -gt 0 ]]; then RAYON_NUM_THREADS="$(( NPROC / PARALLEL_EFFECTIVE ))" else RAYON_NUM_THREADS=1 fi [[ "${RAYON_NUM_THREADS}" -lt 1 ]] && RAYON_NUM_THREADS=1 RAYON_SOURCE="nproc(${NPROC}) / PARALLEL(${PARALLEL_EFFECTIVE})" fi export RAYON_NUM_THREADS # Source + build scratch lives under $HOME/.cache (flatpak-visible via # --filesystem=home). /tmp was tried first but flatpak's sandbox can't see # /tmp, so Godot rejected the --path argument with "Invalid project path". # $HOME/.cache/ still satisfies the apricot-isolation rule (not under ~/Code, # not shared with other devs) and is convention-cleanable. SCRATCH="\$HOME/.cache/mc-src-${STAMP}" # expanded on apricot RESULTS="\$HOME/.cache/mc-batches/${STAMP}" # expanded on apricot # Resolve $HOME on apricot so SCRATCH / RESULTS are fully-qualified paths on that host. SCRATCH_ABS="$(ssh "${APRICOT}" "echo \$HOME/.cache/mc-src-${STAMP}")" RESULTS_ABS="$(ssh "${APRICOT}" "echo \$HOME/.cache/mc-batches/${STAMP}")" echo "============================================================" echo "apricot-run.sh mode=${MODE} stamp=${STAMP}" echo " EDIT host: $(hostname)" echo " RUN host: ${APRICOT}" echo " SCRATCH: ${SCRATCH_ABS} (per-run source + build scratch)" echo " RESULTS: ${RESULTS_ABS} (persistent batch output)" echo " PARALLEL: ${PARALLEL_EFFECTIVE} (source: ${PARALLEL_SOURCE})" echo " RAYON_NUM_THREADS/instance: ${RAYON_NUM_THREADS} (source: ${RAYON_SOURCE})" echo " Total CPU saturation: ${PARALLEL_EFFECTIVE} × ${RAYON_NUM_THREADS} = $((PARALLEL_EFFECTIVE * RAYON_NUM_THREADS))/${NPROC} cores" echo " AI_GPU_ROLLOUT: ${AI_GPU_ROLLOUT:-true (default on for smoke/clan)}" echo "============================================================" # ── Step 1: rsync EDIT → SCRATCH ───────────────────────────────────────────── echo "[$(date +%H:%M:%S)] rsync EDIT source → ${SCRATCH_ABS}..." rsync -a --delete \ --exclude='.git' \ --exclude='.local/build' \ --exclude='.local/iter' \ --exclude='.local/batches' \ --exclude='node_modules' \ --exclude='target' \ --exclude='*.dylib' \ "${PROJECT_DIR}/" "${APRICOT}:${SCRATCH_ABS}/" # ── Step 2: build + deploy via build-gdext.sh ──────────────────────────────── # Canonical build script: runs `cargo build --release --target x86_64-unknown-linux-gnu` # AND copies the output from .local/build/rust/$TARGET/release/libmagic_civ_physics_gdext.so # into src/game/engine/addons/magic_civ_physics/libmagic_civ_physics.x86_64.so # with the name Godot's .gdextension file expects. echo "[$(date +%H:%M:%S)] build-gdext.sh x86_64-unknown-linux-gnu (in SCRATCH)..." # Never pipe build output through `| tail -N` — ssh inherits the pipe's # exit code (0 from tail), which masks build-gdext.sh failures. That's # how a stale April-16 .so ran for 2 hours on 2026-04-17 while bullets # 3-5 of p0-32 stayed ✗ with no visible warning. Let the full log through. ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}/src/simulator' && bash build-gdext.sh x86_64-unknown-linux-gnu" # Also purge the stale .dylib that the rsync carried from the Mac — flatpak Godot # on Linux shouldn't try to load a macOS binary even though the .gdextension config # only points at it for macos.* target strings. ssh "${APRICOT}" "rm -f '${SCRATCH_ABS}/src/game/engine/addons/magic_civ_physics/libmagic_civ_physics.dylib'" # ── Step 3: populate .godot/global_script_class_cache.cfg via editor pre-pass ── # Fresh scratch tree has an empty .godot/ cache. Godot's class_name resolution # (the `as Weather` / `as SplitPanelContainer` idiom) requires this cache to be # populated by an editor-mode scan. Without it, headless autoplay cascades through # "Could not find type X" → "Compilation failed" → turn_manager.gd fails to load. # # `--editor --quit` opens, imports, scans class_names, writes cache, exits. # Expected exit code: 0. We grep the log to verify the cache got written. echo "[$(date +%H:%M:%S)] editor pre-pass to populate .godot/ class cache..." ssh "${APRICOT}" "set -euo pipefail; \ flatpak run --user --filesystem=home --command=godot \ org.godotengine.Godot --headless --editor --quit \ --path '${SCRATCH_ABS}/src/game' 2>&1 | tail -5; \ test -s '${SCRATCH_ABS}/src/game/.godot/global_script_class_cache.cfg' && \ echo ' ✓ class cache populated' || \ echo ' ⚠ class cache missing — headless autoplay may still cascade'" # ── Step 4: run the batch per MODE ─────────────────────────────────────────── ssh "${APRICOT}" "mkdir -p ${RESULTS_ABS}" case "${MODE}" in smoke) SEEDS="${1:-10}"; TURNS="${2:-300}" # Default: use the GPU when available (MCTS rollouts through WGSL kernel). # gpu-walltime mode overrides this explicitly to true/false per iteration. # Default AI_GPU_ROLLOUT=false for smoke/clan. The GPU integration # (p0-20 task #10) is parity-verified on isolated rollouts, but # enabling it in a 2-player smoke produced a deterministic # "P0 always wins at T11-T18, P1 never founds" regression on # 2026-04-18. Opt-in via env override; gpu-walltime flips # per-iteration as its explicit comparison. GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}" echo "[$(date +%H:%M:%S)] smoke batch: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL} ${GPU_ENV}" ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \ AI_USE_MCTS=true ${GPU_ENV} PARALLEL=${PARALLEL} \ bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/smoke 2>&1 | tail -30" ;; clan) CLAN="${1:?usage: apricot-run.sh clan [seeds] [turns]}" SEEDS="${2:-10}"; TURNS="${3:-300}" # Default AI_GPU_ROLLOUT=false for smoke/clan. The GPU integration # (p0-20 task #10) is parity-verified on isolated rollouts, but # enabling it in a 2-player smoke produced a deterministic # "P0 always wins at T11-T18, P1 never founds" regression on # 2026-04-18. Opt-in via env override; gpu-walltime flips # per-iteration as its explicit comparison. GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}" echo "[$(date +%H:%M:%S)] clan=${CLAN} batch: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL} ${GPU_ENV}" ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \ AI_USE_MCTS=true AI_PIN_PERSONALITY='${CLAN}' ${GPU_ENV} PARALLEL=${PARALLEL} \ bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/clan-${CLAN} 2>&1 | tail -30" ;; gpu-walltime) SEEDS="${1:-10}"; TURNS="${2:-300}" echo "[$(date +%H:%M:%S)] GPU wall-time comparison: ${SEEDS} seeds T${TURNS}" for GPU in true false; do echo " --- AI_GPU_ROLLOUT=${GPU} ---" ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \ AI_USE_MCTS=true AI_GPU_ROLLOUT=${GPU} PARALLEL=${PARALLEL} \ bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/gpu-${GPU} 2>&1 | tail -10" done ;; *) echo "ERROR: unknown mode '${MODE}'" >&2 exit 2 ;; esac # ── Step 5: fetch results summary back to EDIT ─────────────────────────────── LOCAL_RESULTS="${PROJECT_DIR}/.local/iter/apricot-${STAMP}" mkdir -p "${LOCAL_RESULTS}" echo "[$(date +%H:%M:%S)] fetch verdict/summary to ${LOCAL_RESULTS}..." scp -r "${APRICOT}:${RESULTS_ABS}/" "${LOCAL_RESULTS}/" 2>/dev/null || \ echo "WARN: scp returned non-zero; check manually on ${APRICOT}:${RESULTS_ABS}" # ── Step 6: prune old local copies — keep only the 3 most recent ───────────── ITER_ROOT="${PROJECT_DIR}/.local/iter" if [[ -d "${ITER_ROOT}" ]]; then # List apricot-* dirs newest-first, skip the first 3, delete the rest. mapfile -t OLD_RUNS < <(ls -1dt "${ITER_ROOT}"/apricot-* 2>/dev/null | tail -n +4) if (( ${#OLD_RUNS[@]} > 0 )); then echo "[$(date +%H:%M:%S)] pruning ${#OLD_RUNS[@]} old local run(s) (keeping 3 newest)..." for d in "${OLD_RUNS[@]}"; do echo " rm -rf ${d}" rm -rf "${d}" done fi fi echo "============================================================" echo "DONE. Scratch at ${APRICOT}:${SCRATCH_ABS} (ephemeral, /tmp)." echo "Results at ${APRICOT}:${RESULTS_ABS} (persistent, .cache)." echo "Local copy at ${LOCAL_RESULTS}" echo "============================================================"