feat(@projects/@magic-civilization): add async smoke test script

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-05-05 14:12:23 -04:00
parent 2e331d2b07
commit 6216d97a76
3 changed files with 375 additions and 2 deletions

81
scripts/apricot-async-smoke.sh Executable file
View file

@ -0,0 +1,81 @@
#!/usr/bin/env bash
# apricot-async-smoke.sh — End-to-end smoke for the p2-64 launch/status/fetch protocol.
#
# Exercises a tiny batch (smoke 1 50) through the full async loop:
# 1. launch → bare stamp on stdout
# 2. status → valid JSON immediately, state in {running, unreachable}
# 3. wait loop → poll until state==complete (or fail)
# 4. fetch → rsync results to .local/iter/<stamp>/
# 5. verify → at least one game_*/turn_stats.jsonl present locally
#
# Skips gracefully (exit 0) if apricot is unreachable, so this can run on plum
# without blocking when the RUN host is offline.
#
# Usage:
# bash scripts/apricot-async-smoke.sh # default smoke 1 50
# POLL_TIMEOUT_S=600 bash scripts/apricot-async-smoke.sh # extend the wait
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
APRICOT="${APRICOT_SSH_ALIAS:-apricot}"
POLL_TIMEOUT_S="${POLL_TIMEOUT_S:-900}"
POLL_INTERVAL_S="${POLL_INTERVAL_S:-15}"
log() { printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; }
# ── Reachability gate: skip if apricot can't be reached at all. ──────────────
if ! ssh -o ConnectTimeout=5 -o BatchMode=yes "$APRICOT" 'echo ok' >/dev/null 2>&1; then
log "apricot unreachable; skipping smoke (exit 0)"
exit 0
fi
# ── Step 1: launch ───────────────────────────────────────────────────────────
log "launching smoke 1 50 …"
STAMP="$(bash "$SCRIPT_DIR/apricot-run.sh" launch smoke 1 50)"
if [[ -z "$STAMP" ]]; then
log "FAIL: launch returned empty stamp"
exit 1
fi
log "launched stamp=$STAMP"
# ── Step 2: status (must be valid JSON, must mention the stamp) ──────────────
STATUS_JSON="$(bash "$SCRIPT_DIR/apricot-run.sh" status "$STAMP")"
log "initial status: $STATUS_JSON"
case "$STATUS_JSON" in
*"\"stamp\":\"$STAMP\""*) ;;
*) log "FAIL: status JSON missing stamp field"; exit 1 ;;
esac
# ── Step 3: poll until complete | failed (with timeout) ──────────────────────
log "polling every ${POLL_INTERVAL_S}s up to ${POLL_TIMEOUT_S}s …"
DEADLINE=$(( $(date +%s) + POLL_TIMEOUT_S ))
STATE="running"
while (( $(date +%s) < DEADLINE )); do
STATUS_JSON="$(bash "$SCRIPT_DIR/apricot-run.sh" status "$STAMP" || true)"
STATE="$(echo "$STATUS_JSON" | sed -n 's/.*"state":"\([^"]*\)".*/\1/p')"
log "state=$STATE ($STATUS_JSON)"
case "$STATE" in
complete) break ;;
failed) log "FAIL: batch failed; journalctl --user -u mc-batch-$STAMP on $APRICOT"; exit 1 ;;
esac
sleep "$POLL_INTERVAL_S"
done
if [[ "$STATE" != "complete" ]]; then
log "FAIL: did not reach complete within ${POLL_TIMEOUT_S}s (last state=$STATE)"
exit 1
fi
# ── Step 4: fetch ────────────────────────────────────────────────────────────
LOCAL_DEST="$(bash "$SCRIPT_DIR/apricot-run.sh" fetch "$STAMP")"
log "fetched to: $LOCAL_DEST"
# ── Step 5: verify result presence ───────────────────────────────────────────
if ! find "$LOCAL_DEST" -path '*/game_*/turn_stats.jsonl' -type f | grep -q .; then
log "FAIL: no turn_stats.jsonl found under $LOCAL_DEST"
exit 1
fi
log "OK — async protocol smoke passed for stamp=$STAMP"

View file

@ -15,11 +15,41 @@
# 5. Fetch verdict JSON back to EDIT host for review.
# 6. Remove the worktree (canonical + objects retained for next run).
#
# Usage:
# ── Synchronous usage (block until done, fetch results inline) ──────────────
# scripts/apricot-run.sh smoke [seeds=10] [turns=300]
# scripts/apricot-run.sh clan <clan_id> [seeds=10] [turns=300]
# scripts/apricot-run.sh gpu-walltime [seeds=10] [turns=300]
#
# ── Async protocol (p2-64): launch / status / fetch ─────────────────────────
# Decouples job lifecycle from the orchestrating ssh. The systemd --user unit
# on apricot owns build+batch and survives ssh disconnect, sleep/wake, network
# blips. EDIT host polls via short ConnectTimeout=5 probes.
#
# STAMP=$(scripts/apricot-run.sh launch smoke 1 50) # bare stdout = stamp
# scripts/apricot-run.sh status "$STAMP" # one-line JSON
# # → {"stamp":"...","state":"running|complete|failed|unreachable",
# # "seeds_done":N,"seeds_total":M,"completion_marker":bool}
# scripts/apricot-run.sh fetch "$STAMP" # rsync results
#
# Loop pattern:
# STAMP=$(scripts/apricot-run.sh launch smoke 10 300)
# while STATE=$(scripts/apricot-run.sh status "$STAMP" | jq -r .state); \
# [[ $STATE != complete ]]; do
# [[ $STATE == failed ]] && exit 1
# sleep 60
# done
# scripts/apricot-run.sh fetch "$STAMP"
#
# Implementation notes:
# • launch writes a per-stamp launcher.sh into ~/.cache/mc-batches/<stamp>/
# and starts it under `systemd-run --user --collect --unit=mc-batch-<stamp>`.
# • The launcher does git fetch, worktree add, build-gdext, autoplay-batch,
# then `touch <submode>/completion.marker` ONLY on success.
# • status uses a single ssh ConnectTimeout=5 with three lightweight probes
# (systemctl is-active, marker count, turn_stats count) — no piped reads
# of file contents (we hit channel saturation with that historically).
# • fetch is rsync -a --partial; resumable across drops.
#
# Environment:
# APRICOT_SSH_ALIAS — ssh alias for the RUN host (default: apricot).
# STAMP — override the timestamp (for reproducing a specific run).
@ -45,9 +75,252 @@ done
# MODE + positional args resolved early so the resource-policy block can
# peek at the seed count (which differs per mode — for `clan` it's $2
# because $1 is the clan_id; for smoke/gpu-walltime it's $1).
MODE="${1:?usage: apricot-run.sh <smoke|clan|difficulty|difficulty-asym|gpu-walltime> [args]}"
MODE="${1:?usage: apricot-run.sh <smoke|clan|difficulty|difficulty-asym|gpu-walltime|launch|status|fetch> [args]}"
shift || true
# ── p2-64 async protocol: launch / status / fetch ────────────────────────────
# These three sub-modes decouple the batch lifecycle from the orchestrating ssh.
# See the header comment for the full protocol shape and example loop.
if [[ "${MODE}" == "launch" ]]; then
SUBMODE="${1:?usage: apricot-run.sh launch <smoke|clan|difficulty|...> [args]}"
shift || true
LAUNCH_ARGS=("$@")
# Pre-resolve the seed count from the sub-mode args (mirrors the resource
# policy peek below). The launcher script on apricot will use this to write
# a seeds_total file before invoking autoplay-batch.
case "${SUBMODE}" in
clan|clan-priors|difficulty) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[1]:-10}" ;;
difficulty-asym) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[2]:-10}" ;;
matchup-grid|huge-map-5clan) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[0]:-5}" ;;
ai-quality-baseline*) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[0]:-50}" ;;
smoke|gpu-walltime|*) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[0]:-10}" ;;
esac
# Map sub-mode → submode results dir (mirrors the case statement below).
case "${SUBMODE}" in
clan) SUBDIR="clan-${LAUNCH_ARGS[0]:-unknown}" ;;
clan-priors) SUBDIR="clan-priors-${LAUNCH_ARGS[0]:-unknown}" ;;
difficulty) SUBDIR="difficulty-${LAUNCH_ARGS[0]:-unknown}" ;;
difficulty-asym) SUBDIR="difficulty-asym-${LAUNCH_ARGS[0]:-unknown}-vs-${LAUNCH_ARGS[1]:-unknown}" ;;
matchup-grid) SUBDIR="matchup-grid" ;;
huge-map-5clan) SUBDIR="huge-map-5clan" ;;
ai-quality-baseline*) SUBDIR="baseline" ;; # tier subdirs underneath
gpu-walltime) SUBDIR="gpu-walltime" ;;
smoke|*) SUBDIR="smoke" ;;
esac
# Build a properly-quoted args string for embedding in the launcher script.
ARGS_QUOTED=""
for a in "${LAUNCH_ARGS[@]}"; do
ARGS_QUOTED+=" $(printf '%q' "$a")"
done
# All status/log output goes to stderr — stdout is reserved for the bare
# stamp value so callers can do STAMP=$(scripts/apricot-run.sh launch ...).
{
echo "[launch] stamp=${STAMP} submode=${SUBMODE} args=${LAUNCH_ARGS[*]}"
echo "[launch] writing launcher.sh to apricot:~/.cache/mc-batches/${STAMP}/"
} >&2
# Write a per-stamp launcher script. Heredoc keeps quoting sane; we splice
# in only the values we actually need ($STAMP, $SUBMODE, $ARGS_QUOTED, $SUBDIR,
# $BUILD_REF, $SEEDS_TOTAL_PEEK).
BUILD_REF_LAUNCH="${BUILD_REF:-origin/main}"
ssh "${APRICOT}" "mkdir -p \"\$HOME/.cache/mc-batches/${STAMP}/${SUBDIR}\" && cat > \"\$HOME/.cache/mc-batches/${STAMP}/launcher.sh\"" <<LAUNCHER
#!/usr/bin/env bash
# Auto-generated by scripts/apricot-run.sh launch on $(date -u +%Y-%m-%dT%H:%M:%SZ)
# Owned by systemd unit mc-batch-${STAMP}; survives ssh disconnects.
set -euo pipefail
STAMP="${STAMP}"
SUBMODE="${SUBMODE}"
SUBDIR="${SUBDIR}"
BUILD_REF="${BUILD_REF_LAUNCH}"
SEEDS_TOTAL=${SEEDS_TOTAL_PEEK}
CANONICAL="\$HOME/Code/project-buildspace/magic-civilization"
SCRATCH="\$HOME/.cache/mc-src-\${STAMP}"
RESULTS="\$HOME/.cache/mc-batches/\${STAMP}"
RESULTS_SUB="\${RESULTS}/\${SUBDIR}"
mkdir -p "\${RESULTS_SUB}"
echo "\${SEEDS_TOTAL}" > "\${RESULTS_SUB}/seeds_total"
# Launch-side log lives next to results so post-mortem doesn't need journalctl.
LOG="\${RESULTS}/launcher.log"
exec >>"\${LOG}" 2>&1
echo "===== mc-batch-\${STAMP} launcher start \$(date -u +%FT%TZ) ====="
echo "submode=\${SUBMODE} subdir=\${SUBDIR} build_ref=\${BUILD_REF} seeds_total=\${SEEDS_TOTAL}"
# ── Resource policy (formerly EDIT-side; moved here so async re-launches from
# any orchestrator host produce the same answer). ─────────────────────────────
NPROC="\$(nproc 2>/dev/null || echo 8)"
if [[ -n "\${PARALLEL:-}" ]]; then
PARALLEL_EFFECTIVE="\${PARALLEL}"
elif [[ "\${USE_MAX_CORES:-true}" == "true" ]]; then
PARALLEL_EFFECTIVE="\$(( SEEDS_TOTAL < NPROC ? SEEDS_TOTAL : NPROC ))"
else
PARALLEL_EFFECTIVE="\${MIN_CORES:-4}"
fi
[[ "\${PARALLEL_EFFECTIVE}" -lt 1 ]] && PARALLEL_EFFECTIVE=1
export PARALLEL="\${PARALLEL_EFFECTIVE}"
if [[ -z "\${RAYON_NUM_THREADS:-}" ]]; then
RAYON_NUM_THREADS="\$(( NPROC / PARALLEL_EFFECTIVE ))"
[[ "\${RAYON_NUM_THREADS}" -lt 1 ]] && RAYON_NUM_THREADS=1
fi
export RAYON_NUM_THREADS
echo "PARALLEL=\${PARALLEL} RAYON_NUM_THREADS=\${RAYON_NUM_THREADS} NPROC=\${NPROC}"
# ── Step 1: fetch + worktree ─────────────────────────────────────────────────
test -d "\${CANONICAL}/.git" || {
echo "ERROR: canonical checkout missing at \${CANONICAL}" >&2
exit 1
}
git -C "\${CANONICAL}" fetch origin --quiet
git -C "\${CANONICAL}" worktree add --detach "\${SCRATCH}" "\${BUILD_REF}"
BUILT_SHA="\$(git -C "\${SCRATCH}" rev-parse --short HEAD)"
echo "built_sha=\${BUILT_SHA}"
# ── Step 2: build ────────────────────────────────────────────────────────────
( cd "\${SCRATCH}/src/simulator" && bash build-gdext.sh x86_64-unknown-linux-gnu )
rm -f "\${SCRATCH}/src/game/engine/addons/magic_civ_physics/libmagic_civ_physics.dylib"
# ── Step 3: editor pre-pass to populate .godot/ class cache ─────────────────
flatpak run --user --filesystem=home --command=godot \\
org.godotengine.Godot --headless --editor --quit \\
--path "\${SCRATCH}/src/game" 2>&1 | tail -5 || true
# ── Step 4: run the batch ────────────────────────────────────────────────────
GPU_ENV_VAL="\${AI_GPU_ROLLOUT:-false}"
cd "\${SCRATCH}"
case "\${SUBMODE}" in
smoke)
AI_USE_MCTS=true AI_GPU_ROLLOUT="\${GPU_ENV_VAL}" PARALLEL="\${PARALLEL}" \\
bash tools/autoplay-batch.sh "\$@" "\${RESULTS_SUB}"
;;
clan)
CLAN="\$1"; shift
AI_USE_MCTS=true AI_PIN_PERSONALITY="\${CLAN}" \\
AI_GPU_ROLLOUT="\${GPU_ENV_VAL}" PARALLEL="\${PARALLEL}" \\
bash tools/autoplay-batch.sh "\$@" "\${RESULTS_SUB}"
;;
difficulty)
TIER="\$1"; shift
AI_USE_MCTS=true AI_DIFFICULTY="\${TIER}" \\
AI_DIFFICULTY_P0="\${TIER}" AI_DIFFICULTY_P1="\${TIER}" \\
AI_GPU_ROLLOUT="\${GPU_ENV_VAL}" PARALLEL="\${PARALLEL}" \\
bash tools/autoplay-batch.sh "\$@" "\${RESULTS_SUB}"
;;
*)
echo "ERROR: launcher does not yet support submode '\${SUBMODE}'" >&2
exit 2
;;
esac
# Only on success path: write completion.marker. Status's "failed" state =
# unit inactive AND no marker.
touch "\${RESULTS_SUB}/completion.marker"
echo "===== mc-batch-\${STAMP} launcher OK \$(date -u +%FT%TZ) ====="
# Cleanup worktree (canonical + objects retained for next run).
git -C "\${CANONICAL}" worktree remove --force "\${SCRATCH}" 2>&1 || rm -rf "\${SCRATCH}"
LAUNCHER
# The launcher reads args from "$@" inside its case branches; we pass
# them through the systemd-run invocation below.
ssh "${APRICOT}" "chmod +x \"\$HOME/.cache/mc-batches/${STAMP}/launcher.sh\""
# Start the unit. Pass the full LAUNCH_ARGS as positional args to the
# launcher script (they show up as $1, $2, ... inside the case branches).
SYSTEMD_CMD="systemd-run --user --collect --unit=mc-batch-${STAMP} \"\$HOME/.cache/mc-batches/${STAMP}/launcher.sh\"${ARGS_QUOTED}"
echo "[launch] starting systemd unit mc-batch-${STAMP}" >&2
if ! ssh "${APRICOT}" "${SYSTEMD_CMD}" >&2; then
echo "[launch] FAILED to start systemd unit; check ssh + systemd --user availability" >&2
exit 1
fi
echo "[launch] unit started; tail logs via: ssh ${APRICOT} 'journalctl --user -u mc-batch-${STAMP} -f'" >&2
# Bare stamp on stdout — this is the contract for callers.
echo "${STAMP}"
exit 0
fi
if [[ "${MODE}" == "status" ]]; then
QUERY_STAMP="${1:?usage: apricot-run.sh status <stamp>}"
UNIT="mc-batch-${QUERY_STAMP}"
# Single ssh probe with short ConnectTimeout. Three lightweight queries:
# 1. systemctl --user is-active <unit> (active|inactive|failed|unknown)
# 2. count of completion.marker files under <stamp>/*/
# 3. count of turn_stats.jsonl files under <stamp>/*/game_*/
# We also read seeds_total from the first submode dir if present.
PROBE='set +e
IS_ACTIVE=$(systemctl --user is-active '"${UNIT}"' 2>/dev/null || echo unknown)
MARKER_COUNT=$(ls "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/completion.marker 2>/dev/null | wc -l | tr -d " ")
STATS_COUNT=$(ls "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/game_*/turn_stats.jsonl 2>/dev/null | wc -l | tr -d " ")
SEEDS_TOTAL=$(cat "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/seeds_total 2>/dev/null | head -1)
SEEDS_TOTAL=${SEEDS_TOTAL:-0}
printf "%s|%s|%s|%s\n" "$IS_ACTIVE" "$MARKER_COUNT" "$STATS_COUNT" "$SEEDS_TOTAL"'
PROBE_OUT="$(ssh -o ConnectTimeout=5 -o BatchMode=yes "${APRICOT}" "${PROBE}" 2>/dev/null)" || PROBE_OUT=""
if [[ -z "${PROBE_OUT}" ]]; then
printf '{"stamp":"%s","state":"unreachable","seeds_done":0,"seeds_total":0,"completion_marker":false}\n' "${QUERY_STAMP}"
exit 0
fi
IFS='|' read -r IS_ACTIVE MARKER_COUNT STATS_COUNT SEEDS_TOTAL <<<"${PROBE_OUT}"
MARKER_COUNT="${MARKER_COUNT:-0}"
STATS_COUNT="${STATS_COUNT:-0}"
SEEDS_TOTAL="${SEEDS_TOTAL:-0}"
if [[ "${MARKER_COUNT}" -gt 0 ]]; then
STATE="complete"
MARKER_BOOL="true"
elif [[ "${IS_ACTIVE}" == "active" || "${IS_ACTIVE}" == "activating" ]]; then
STATE="running"
MARKER_BOOL="false"
else
STATE="failed"
MARKER_BOOL="false"
fi
printf '{"stamp":"%s","state":"%s","seeds_done":%s,"seeds_total":%s,"completion_marker":%s}\n' \
"${QUERY_STAMP}" "${STATE}" "${STATS_COUNT}" "${SEEDS_TOTAL}" "${MARKER_BOOL}"
exit 0
fi
if [[ "${MODE}" == "fetch" ]]; then
QUERY_STAMP="${1:?usage: apricot-run.sh fetch <stamp>}"
LOCAL_DEST="${PROJECT_DIR}/.local/iter/${QUERY_STAMP}"
# Re-use status to gate the fetch — this is the same one-line probe.
STATUS_JSON="$("$0" status "${QUERY_STAMP}")"
STATE="$(echo "${STATUS_JSON}" | sed -n 's/.*"state":"\([^"]*\)".*/\1/p')"
if [[ "${STATE}" != "complete" ]]; then
echo "[fetch] batch not complete; status=${STATE}" >&2
echo "[fetch] full status: ${STATUS_JSON}" >&2
exit 1
fi
mkdir -p "${LOCAL_DEST}"
echo "[fetch] rsync apricot:~/.cache/mc-batches/${QUERY_STAMP}/ → ${LOCAL_DEST}/" >&2
rsync -a --partial \
"${APRICOT}:.cache/mc-batches/${QUERY_STAMP}/" \
"${LOCAL_DEST}/"
echo "${LOCAL_DEST}"
exit 0
fi
# ── end p2-64 async protocol ─────────────────────────────────────────────────
# ── Resource policy for PARALLEL + RAYON_NUM_THREADS ─────────────────
# Each Godot instance spawns its own rayon thread pool for MCTS rollouts;
# rayon defaults to nproc unless RAYON_NUM_THREADS is set. If PARALLEL

View file

@ -30,3 +30,22 @@ ssh apricot 'cd ~/.cache/mc-src-<stamp> && timeout 60 flatpak run --user --files
```
Subsequent runs (autoplay, GUT, batches) will then load extensions and class_name registrations correctly.
## Async batch protocol on apricot (p2-64)
When apricot connectivity is intermittent (sleep/wake, sshd channel saturation, network blips), use the launch / status / fetch loop instead of the synchronous `scripts/apricot-run.sh smoke …` flow. The systemd `--user` unit on apricot owns the build+batch lifecycle and survives ssh disconnects. Status probes use a single short-timeout ssh and never read file contents — only `is-active` / `ls | wc -l` style checks.
```
STAMP=$(scripts/apricot-run.sh launch smoke 10 300) # bare stdout = stamp
while STATE=$(scripts/apricot-run.sh status "$STAMP" | jq -r .state); \
[[ $STATE != complete ]]; do
[[ $STATE == failed ]] && { echo "batch failed; journalctl --user -u mc-batch-$STAMP" >&2; exit 1; }
[[ $STATE == unreachable ]] && sleep 30 && continue
sleep 60
done
LOCAL=$(scripts/apricot-run.sh fetch "$STAMP") # rsync to .local/iter/<stamp>/
```
States: `running` (unit active), `complete` (`completion.marker` present), `failed` (unit inactive + no marker), `unreachable` (ssh probe timeout — retryable, no work lost).
Submodes currently wired into the launcher: `smoke`, `clan`, `difficulty`. Other modes (`gpu-walltime`, `matchup-grid`, `huge-map-5clan`, `ai-quality-baseline*`) still run via the synchronous flow and can be added to the launcher case-branch as needed.