magicciv/scripts/apricot-run.sh

716 lines
36 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# apricot-run.sh — Isolated build + batch pipeline on apricot, sourced from forge.
#
# Source-of-truth flow: forge → apricot canonical checkout → per-run worktree.
# Plum is NOT in this loop. Agents push their work via ACS (commits-tray on plum
# pushes every ~5 min), then this script builds the latest origin/main on apricot.
#
# 1. Fetch latest origin/main into the canonical checkout
# (~/Code/project-buildspace/magic-civilization, hard-fenced via pre-commit hook).
# 2. `git worktree add` a per-run scratch tree at ~/.cache/mc-src-<stamp>/
# (shared object pool with canonical, isolated working tree, fast + reproducible).
# 3. Build (cargo) in the worktree, target dir stays there (ephemeral).
# 4. Run the batch with RESULTS_DIR under ~/.cache/mc-batches/<stamp>/
# (persistent, XDG cache convention, flatpak-visible via --filesystem=home).
# 5. Fetch verdict JSON back to EDIT host for review.
# 6. Remove the worktree (canonical + objects retained for next run).
#
# ── Synchronous usage (block until done, fetch results inline) ──────────────
# scripts/apricot-run.sh smoke [seeds=10] [turns=300]
# scripts/apricot-run.sh clan <clan_id> [seeds=10] [turns=300]
# scripts/apricot-run.sh gpu-walltime [seeds=10] [turns=300]
#
# ── Async protocol (p2-64): launch / status / fetch ─────────────────────────
# Decouples job lifecycle from the orchestrating ssh. The systemd --user unit
# on apricot owns build+batch and survives ssh disconnect, sleep/wake, network
# blips. EDIT host polls via short ConnectTimeout=5 probes.
#
# STAMP=$(scripts/apricot-run.sh launch smoke 1 50) # bare stdout = stamp
# scripts/apricot-run.sh status "$STAMP" # one-line JSON
# # → {"stamp":"...","state":"running|complete|failed|unreachable",
# # "seeds_done":N,"seeds_total":M,"completion_marker":bool}
# scripts/apricot-run.sh fetch "$STAMP" # rsync results
#
# Loop pattern:
# STAMP=$(scripts/apricot-run.sh launch smoke 10 300)
# while STATE=$(scripts/apricot-run.sh status "$STAMP" | jq -r .state); \
# [[ $STATE != complete ]]; do
# [[ $STATE == failed ]] && exit 1
# sleep 60
# done
# scripts/apricot-run.sh fetch "$STAMP"
#
# Implementation notes:
# • launch writes a per-stamp launcher.sh into ~/.cache/mc-batches/<stamp>/
# and starts it under `systemd-run --user --collect --unit=mc-batch-<stamp>`.
# • The launcher does git fetch, worktree add, build-gdext, autoplay-batch,
# then `touch <submode>/completion.marker` ONLY on success.
# • status uses a single ssh ConnectTimeout=5 with three lightweight probes
# (systemctl is-active, marker count, turn_stats count) — no piped reads
# of file contents (we hit channel saturation with that historically).
# • fetch is rsync -a --partial; resumable across drops.
#
# Environment:
# APRICOT_SSH_ALIAS — ssh alias for the RUN host (default: apricot).
# STAMP — override the timestamp (for reproducing a specific run).
# BUILD_REF — git ref to build (default: origin/main). Lets you reproduce
# a prior run by SHA without changing your local plum tree.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
APRICOT="${APRICOT_SSH_ALIAS:-apricot}"
STAMP="${STAMP:-$(date +%Y%m%d_%H%M%S)}"
# ── Load .env / .env.local so USE_MAX_CORES + MIN_CORES + AI_GPU_ROLLOUT
# propagate into the resource policy below. .env.local wins over .env.
for envfile in "${PROJECT_DIR}/.env" "${PROJECT_DIR}/.env.local"; do
if [[ -f "${envfile}" ]]; then
set -a; source "${envfile}"; set +a
fi
done
# MODE + positional args resolved early so the resource-policy block can
# peek at the seed count (which differs per mode — for `clan` it's $2
# because $1 is the clan_id; for smoke/gpu-walltime it's $1).
MODE="${1:?usage: apricot-run.sh <smoke|clan|difficulty|difficulty-asym|gpu-walltime|launch|status|fetch> [args]}"
shift || true
# ── p2-64 async protocol: launch / status / fetch ────────────────────────────
# These three sub-modes decouple the batch lifecycle from the orchestrating ssh.
# See the header comment for the full protocol shape and example loop.
if [[ "${MODE}" == "launch" ]]; then
SUBMODE="${1:?usage: apricot-run.sh launch <smoke|clan|difficulty|...> [args]}"
shift || true
LAUNCH_ARGS=("$@")
# Pre-resolve the seed count from the sub-mode args (mirrors the resource
# policy peek below). The launcher script on apricot will use this to write
# a seeds_total file before invoking autoplay-batch.
case "${SUBMODE}" in
clan|clan-priors|difficulty) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[1]:-10}" ;;
difficulty-asym) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[2]:-10}" ;;
matchup-grid|huge-map-5clan) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[0]:-5}" ;;
ai-quality-baseline*) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[0]:-50}" ;;
smoke|gpu-walltime|*) SEEDS_TOTAL_PEEK="${LAUNCH_ARGS[0]:-10}" ;;
esac
# Map sub-mode → submode results dir (mirrors the case statement below).
case "${SUBMODE}" in
clan) SUBDIR="clan-${LAUNCH_ARGS[0]:-unknown}" ;;
clan-priors) SUBDIR="clan-priors-${LAUNCH_ARGS[0]:-unknown}" ;;
difficulty) SUBDIR="difficulty-${LAUNCH_ARGS[0]:-unknown}" ;;
difficulty-asym) SUBDIR="difficulty-asym-${LAUNCH_ARGS[0]:-unknown}-vs-${LAUNCH_ARGS[1]:-unknown}" ;;
matchup-grid) SUBDIR="matchup-grid" ;;
huge-map-5clan) SUBDIR="huge-map-5clan" ;;
ai-quality-baseline*) SUBDIR="baseline" ;; # tier subdirs underneath
gpu-walltime) SUBDIR="gpu-walltime" ;;
smoke|*) SUBDIR="smoke" ;;
esac
# Build a properly-quoted args string for embedding in the launcher script.
ARGS_QUOTED=""
for a in "${LAUNCH_ARGS[@]}"; do
ARGS_QUOTED+=" $(printf '%q' "$a")"
done
# All status/log output goes to stderr — stdout is reserved for the bare
# stamp value so callers can do STAMP=$(scripts/apricot-run.sh launch ...).
{
echo "[launch] stamp=${STAMP} submode=${SUBMODE} args=${LAUNCH_ARGS[*]}"
echo "[launch] writing launcher.sh to apricot:~/.cache/mc-batches/${STAMP}/"
} >&2
# Write a per-stamp launcher script. Heredoc keeps quoting sane; we splice
# in only the values we actually need ($STAMP, $SUBMODE, $ARGS_QUOTED, $SUBDIR,
# $BUILD_REF, $SEEDS_TOTAL_PEEK).
BUILD_REF_LAUNCH="${BUILD_REF:-origin/main}"
ssh "${APRICOT}" "mkdir -p \"\$HOME/.cache/mc-batches/${STAMP}/${SUBDIR}\" && cat > \"\$HOME/.cache/mc-batches/${STAMP}/launcher.sh\"" <<LAUNCHER
#!/usr/bin/env bash
# Auto-generated by scripts/apricot-run.sh launch on $(date -u +%Y-%m-%dT%H:%M:%SZ)
# Owned by systemd unit mc-batch-${STAMP}; survives ssh disconnects.
set -euo pipefail
STAMP="${STAMP}"
SUBMODE="${SUBMODE}"
SUBDIR="${SUBDIR}"
BUILD_REF="${BUILD_REF_LAUNCH}"
SEEDS_TOTAL=${SEEDS_TOTAL_PEEK}
CANONICAL="\$HOME/Code/project-buildspace/magic-civilization"
SCRATCH="\$HOME/.cache/mc-src-\${STAMP}"
RESULTS="\$HOME/.cache/mc-batches/\${STAMP}"
RESULTS_SUB="\${RESULTS}/\${SUBDIR}"
mkdir -p "\${RESULTS_SUB}"
echo "\${SEEDS_TOTAL}" > "\${RESULTS_SUB}/seeds_total"
# Launch-side log lives next to results so post-mortem doesn't need journalctl.
LOG="\${RESULTS}/launcher.log"
exec >>"\${LOG}" 2>&1
echo "===== mc-batch-\${STAMP} launcher start \$(date -u +%FT%TZ) ====="
echo "submode=\${SUBMODE} subdir=\${SUBDIR} build_ref=\${BUILD_REF} seeds_total=\${SEEDS_TOTAL}"
# ── Resource policy (formerly EDIT-side; moved here so async re-launches from
# any orchestrator host produce the same answer). ─────────────────────────────
NPROC="\$(nproc 2>/dev/null || echo 8)"
if [[ -n "\${PARALLEL:-}" ]]; then
PARALLEL_EFFECTIVE="\${PARALLEL}"
elif [[ "\${USE_MAX_CORES:-true}" == "true" ]]; then
PARALLEL_EFFECTIVE="\$(( SEEDS_TOTAL < NPROC ? SEEDS_TOTAL : NPROC ))"
else
PARALLEL_EFFECTIVE="\${MIN_CORES:-4}"
fi
[[ "\${PARALLEL_EFFECTIVE}" -lt 1 ]] && PARALLEL_EFFECTIVE=1
export PARALLEL="\${PARALLEL_EFFECTIVE}"
if [[ -z "\${RAYON_NUM_THREADS:-}" ]]; then
RAYON_NUM_THREADS="\$(( NPROC / PARALLEL_EFFECTIVE ))"
[[ "\${RAYON_NUM_THREADS}" -lt 1 ]] && RAYON_NUM_THREADS=1
fi
export RAYON_NUM_THREADS
echo "PARALLEL=\${PARALLEL} RAYON_NUM_THREADS=\${RAYON_NUM_THREADS} NPROC=\${NPROC}"
# ── Step 1: fetch + worktree ─────────────────────────────────────────────────
test -d "\${CANONICAL}/.git" || {
echo "ERROR: canonical checkout missing at \${CANONICAL}" >&2
exit 1
}
git -C "\${CANONICAL}" fetch origin --quiet
git -C "\${CANONICAL}" worktree add --detach "\${SCRATCH}" "\${BUILD_REF}"
BUILT_SHA="\$(git -C "\${SCRATCH}" rev-parse --short HEAD)"
echo "built_sha=\${BUILT_SHA}"
# ── Step 2: docker build (bakes libmagic_civ_physics.x86_64.so per SHA) ──────
# The image bakes the simulator .so for this exact SHA. mc-ai-docker.sh then
# bind-mounts the worktree at /work, and the entrypoint installs the baked
# .so over whatever the worktree carries — guaranteeing Godot inside the
# container loads the SHA-matched artifact and never a stale one.
#
# Docker daemon must be running on this host. We fail loudly (exit 2) if
# it's not — no fallback to bare flatpak/cargo, per the brief's anti-stopgap
# rule. BuildKit's cache mounts keep this fast on rebuilds of the same SHA.
IMAGE_TAG="mc-ai:\${BUILT_SHA}"
DOCKER_BUILDKIT=1 docker build \\
--tag "\${IMAGE_TAG}" \\
--file "\${SCRATCH}/tools/docker/Dockerfile.mc-ai" \\
"\${SCRATCH}"
# Trap → docker kill any container we started, so an OOM-killed or
# externally-stopped launcher doesn't leave the daemon-owned container
# running past the systemd unit's lifetime.
CONTAINER_NAME="mc-batch-\${STAMP}-run"
trap 'docker kill "\${CONTAINER_NAME}" 2>/dev/null || true' EXIT
# Container cgroup ceilings. Scaled with PARALLEL so an N-seed batch gets
# N cores + headroom; explicit env overrides win. apricot has ~64 cores /
# ~96G RAM, so these defaults are safe and bounded.
MC_AI_CPUS_DEFAULT="\${PARALLEL}"
MC_AI_MEMORY_GB_DEFAULT="\$(( PARALLEL * 3 + 2 ))"
[[ "\${MC_AI_MEMORY_GB_DEFAULT}" -lt 4 ]] && MC_AI_MEMORY_GB_DEFAULT=4
export MC_AI_IMAGE_TAG="\${IMAGE_TAG}"
export MC_AI_WORKTREE="\${SCRATCH}"
export MC_AI_OUTPUT_DIR="\${RESULTS_SUB}"
export MC_AI_CPUS="\${MC_AI_CPUS:-\${MC_AI_CPUS_DEFAULT}}"
export MC_AI_MEMORY="\${MC_AI_MEMORY:-\${MC_AI_MEMORY_GB_DEFAULT}g}"
export MC_AI_PIDS="\${MC_AI_PIDS:-1024}"
export MC_AI_CONTAINER_NAME="\${CONTAINER_NAME}"
run_in_container() {
# Caller passes a sequence of KEY=VAL ... cmd args…; we route the KVs
# through MC_AI_EXTRA_ENV so they land as docker -e flags.
local _envs=()
while [[ \$# -gt 0 && "\$1" == *=* ]]; do
_envs+=("\$1"); shift
done
MC_AI_EXTRA_ENV="\${_envs[*]}" bash "\${SCRATCH}/scripts/mc-ai-docker.sh" "\$@"
}
# ── Step 3: editor pre-pass to populate .godot/ class cache (in container) ──
run_in_container godot --headless --editor --quit --path /work/src/game 2>&1 | tail -5 || true
# ── Step 4: run the batch (in container) ────────────────────────────────────
GPU_ENV_VAL="\${AI_GPU_ROLLOUT:-false}"
# Inside the container the batch writes to /work/.local/out (bind-mounted to
# RESULTS_SUB on the host). autoplay-batch.sh accepts results_dir as its last
# positional arg; huge-map-5clan.sh accepts it via HUGE_OUTPUT.
CONTAINER_OUT="/work/.local/out"
case "\${SUBMODE}" in
smoke)
run_in_container \\
AUTOPLAY_GODOT_BIN=godot \\
AI_USE_MCTS=true \\
AI_GPU_ROLLOUT="\${GPU_ENV_VAL}" \\
PARALLEL="\${PARALLEL}" \\
RAYON_NUM_THREADS="\${RAYON_NUM_THREADS}" \\
bash tools/autoplay-batch.sh "\$@" "\${CONTAINER_OUT}"
;;
clan)
CLAN="\$1"; shift
run_in_container \\
AUTOPLAY_GODOT_BIN=godot \\
AI_USE_MCTS=true \\
AI_PIN_PERSONALITY="\${CLAN}" \\
AI_GPU_ROLLOUT="\${GPU_ENV_VAL}" \\
PARALLEL="\${PARALLEL}" \\
RAYON_NUM_THREADS="\${RAYON_NUM_THREADS}" \\
bash tools/autoplay-batch.sh "\$@" "\${CONTAINER_OUT}"
;;
difficulty)
TIER="\$1"; shift
run_in_container \\
AUTOPLAY_GODOT_BIN=godot \\
AI_USE_MCTS=true \\
AI_DIFFICULTY="\${TIER}" \\
AI_DIFFICULTY_P0="\${TIER}" \\
AI_DIFFICULTY_P1="\${TIER}" \\
AI_GPU_ROLLOUT="\${GPU_ENV_VAL}" \\
PARALLEL="\${PARALLEL}" \\
RAYON_NUM_THREADS="\${RAYON_NUM_THREADS}" \\
bash tools/autoplay-batch.sh "\$@" "\${CONTAINER_OUT}"
;;
huge-map-5clan)
COUNT="\${1:-5}"; TURN_LIMIT="\${2:-300}"
run_in_container \\
AUTOPLAY_GODOT_BIN=godot \\
AI_USE_MCTS=true \\
PARALLEL="\${PARALLEL}" \\
RAYON_NUM_THREADS="\${RAYON_NUM_THREADS}" \\
COUNT="\${COUNT}" \\
TURN_LIMIT="\${TURN_LIMIT}" \\
HUGE_OUTPUT="\${CONTAINER_OUT}" \\
bash tools/huge-map-5clan.sh
;;
*)
echo "ERROR: launcher does not yet support submode '\${SUBMODE}'" >&2
exit 2
;;
esac
# Only on success path: write completion.marker. Status's "failed" state =
# unit inactive AND no marker.
touch "\${RESULTS_SUB}/completion.marker"
echo "===== mc-batch-\${STAMP} launcher OK \$(date -u +%FT%TZ) ====="
# Cleanup worktree (canonical + objects retained for next run).
git -C "\${CANONICAL}" worktree remove --force "\${SCRATCH}" 2>&1 || rm -rf "\${SCRATCH}"
LAUNCHER
# The launcher reads args from "$@" inside its case branches; we pass
# them through the systemd-run invocation below.
ssh "${APRICOT}" "chmod +x \"\$HOME/.cache/mc-batches/${STAMP}/launcher.sh\""
# Start the unit. Pass the full LAUNCH_ARGS as positional args to the
# launcher script (they show up as $1, $2, ... inside the case branches).
SYSTEMD_CMD="systemd-run --user --collect --unit=mc-batch-${STAMP} \"\$HOME/.cache/mc-batches/${STAMP}/launcher.sh\"${ARGS_QUOTED}"
echo "[launch] starting systemd unit mc-batch-${STAMP}" >&2
if ! ssh "${APRICOT}" "${SYSTEMD_CMD}" >&2; then
echo "[launch] FAILED to start systemd unit; check ssh + systemd --user availability" >&2
exit 1
fi
echo "[launch] unit started; tail logs via: ssh ${APRICOT} 'journalctl --user -u mc-batch-${STAMP} -f'" >&2
# Bare stamp on stdout — this is the contract for callers.
echo "${STAMP}"
exit 0
fi
if [[ "${MODE}" == "status" ]]; then
QUERY_STAMP="${1:?usage: apricot-run.sh status <stamp>}"
UNIT="mc-batch-${QUERY_STAMP}"
# Single ssh probe with short ConnectTimeout. Four lightweight queries:
# 1. systemctl --user is-active <unit> (active|inactive|failed|unknown)
# 2. count of completion.marker files under <stamp>/*/
# 3. count of turn_stats.jsonl files under <stamp>/*/game_*/
# 4. count of live godot processes for THIS batch stamp
# We also read seeds_total from the first submode dir if present.
#
# The godot-proc count is load-bearing: `flatpak run` detaches into a
# systemd user scope, so autoplay-batch.sh's `wait` returns and
# completion.marker is touched while the actual godot processes are still
# running headless games. Without checking live procs, fetch would pull
# mid-run turn_stats with outcome=in_progress and the consumer would
# think the gate failed when in fact games hadn't finished yet.
PROBE='set +e
IS_ACTIVE=$(systemctl --user is-active '"${UNIT}"' 2>/dev/null || echo unknown)
MARKER_COUNT=$(ls "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/completion.marker 2>/dev/null | wc -l | tr -d " ")
STATS_COUNT=$(ls "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/game_*/turn_stats.jsonl 2>/dev/null | wc -l | tr -d " ")
GODOT_PROCS=$(pgrep -af "godot.*'"${QUERY_STAMP}"'" 2>/dev/null | grep -c "godot --path" || echo 0)
SEEDS_TOTAL=$(cat "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/seeds_total 2>/dev/null | head -1)
SEEDS_TOTAL=${SEEDS_TOTAL:-0}
printf "%s|%s|%s|%s|%s\n" "$IS_ACTIVE" "$MARKER_COUNT" "$STATS_COUNT" "$SEEDS_TOTAL" "$GODOT_PROCS"'
PROBE_OUT="$(ssh -o ConnectTimeout=5 -o BatchMode=yes "${APRICOT}" "${PROBE}" 2>/dev/null)" || PROBE_OUT=""
if [[ -z "${PROBE_OUT}" ]]; then
printf '{"stamp":"%s","state":"unreachable","seeds_done":0,"seeds_total":0,"completion_marker":false}\n' "${QUERY_STAMP}"
exit 0
fi
IFS='|' read -r IS_ACTIVE MARKER_COUNT STATS_COUNT SEEDS_TOTAL GODOT_PROCS <<<"${PROBE_OUT}"
MARKER_COUNT="${MARKER_COUNT:-0}"
STATS_COUNT="${STATS_COUNT:-0}"
SEEDS_TOTAL="${SEEDS_TOTAL:-0}"
GODOT_PROCS="${GODOT_PROCS:-0}"
if [[ "${MARKER_COUNT}" -gt 0 && "${GODOT_PROCS}" -eq 0 ]]; then
STATE="complete"
MARKER_BOOL="true"
elif [[ "${MARKER_COUNT}" -gt 0 && "${GODOT_PROCS}" -gt 0 ]]; then
# Batch script returned (touched completion.marker) but flatpak-
# detached godot processes are still playing games. Status remains
# `running` so `fetch` won't pull mid-run turn_stats snapshots.
STATE="running"
MARKER_BOOL="true"
elif [[ "${IS_ACTIVE}" == "active" || "${IS_ACTIVE}" == "activating" ]]; then
STATE="running"
MARKER_BOOL="false"
else
STATE="failed"
MARKER_BOOL="false"
fi
printf '{"stamp":"%s","state":"%s","seeds_done":%s,"seeds_total":%s,"completion_marker":%s,"godot_procs":%s}\n' \
"${QUERY_STAMP}" "${STATE}" "${STATS_COUNT}" "${SEEDS_TOTAL}" "${MARKER_BOOL}" "${GODOT_PROCS}"
exit 0
fi
if [[ "${MODE}" == "fetch" ]]; then
QUERY_STAMP="${1:?usage: apricot-run.sh fetch <stamp>}"
LOCAL_DEST="${PROJECT_DIR}/.local/iter/${QUERY_STAMP}"
# Re-use status to gate the fetch — this is the same one-line probe.
STATUS_JSON="$("$0" status "${QUERY_STAMP}")"
STATE="$(echo "${STATUS_JSON}" | sed -n 's/.*"state":"\([^"]*\)".*/\1/p')"
if [[ "${STATE}" != "complete" ]]; then
echo "[fetch] batch not complete; status=${STATE}" >&2
echo "[fetch] full status: ${STATUS_JSON}" >&2
exit 1
fi
mkdir -p "${LOCAL_DEST}"
echo "[fetch] rsync apricot:~/.cache/mc-batches/${QUERY_STAMP}/ → ${LOCAL_DEST}/" >&2
rsync -a --partial \
"${APRICOT}:.cache/mc-batches/${QUERY_STAMP}/" \
"${LOCAL_DEST}/"
echo "${LOCAL_DEST}"
exit 0
fi
# ── end p2-64 async protocol ─────────────────────────────────────────────────
# ── Resource policy for PARALLEL + RAYON_NUM_THREADS ─────────────────
# Each Godot instance spawns its own rayon thread pool for MCTS rollouts;
# rayon defaults to nproc unless RAYON_NUM_THREADS is set. If PARALLEL
# instances each claim all nproc threads, we get PARALLEL*nproc threads
# fighting over nproc cores → thrashing, each process effectively single
# core. Better: PARALLEL = number of seeds (one instance each), and
# RAYON_NUM_THREADS = nproc / PARALLEL so the box is saturated evenly.
case "${MODE}" in
clan) _seed_count_peek="${2:-10}" ;; # $1 is clan_id, $2 is seeds
clan-priors) _seed_count_peek="${2:-10}" ;; # $1 is clan_id, $2 is seeds
difficulty) _seed_count_peek="${2:-10}" ;; # $1 is tier, $2 is seeds
difficulty-asym) _seed_count_peek="${3:-10}" ;; # $1 p0 tier, $2 p1 tier, $3 seeds
matchup-grid) _seed_count_peek="${1:-5}" ;; # $1 is seeds_per_pair (default 5); total=10pairs*seeds
huge-map-5clan) _seed_count_peek="${1:-5}" ;; # $1 is seeds
ai-quality-baseline*) _seed_count_peek="${1:-50}" ;; # $1 seeds-per-tier
*) _seed_count_peek="${1:-10}" ;; # smoke, gpu-walltime
esac
NPROC="$(ssh "${APRICOT}" nproc 2>/dev/null || echo 8)"
if [[ -n "${PARALLEL:-}" ]]; then
PARALLEL_EFFECTIVE="${PARALLEL}"
PARALLEL_SOURCE="env override"
elif [[ "${USE_MAX_CORES:-false}" == "true" ]]; then
# One instance per seed — up to NPROC. More instances than that
# would queue serially anyway (NPROC concurrent Godots max).
PARALLEL_EFFECTIVE="$(( _seed_count_peek < NPROC ? _seed_count_peek : NPROC ))"
PARALLEL_SOURCE="USE_MAX_CORES=true → min(seeds=${_seed_count_peek}, nproc=${NPROC})"
else
PARALLEL_EFFECTIVE="${MIN_CORES:-4}"
PARALLEL_SOURCE="MIN_CORES default"
fi
export PARALLEL="${PARALLEL_EFFECTIVE}"
# RAYON_NUM_THREADS per Godot instance = fair share of cores.
if [[ -n "${RAYON_NUM_THREADS:-}" ]]; then
RAYON_SOURCE="env override"
else
if [[ "${PARALLEL_EFFECTIVE}" -gt 0 ]]; then
RAYON_NUM_THREADS="$(( NPROC / PARALLEL_EFFECTIVE ))"
else
RAYON_NUM_THREADS=1
fi
[[ "${RAYON_NUM_THREADS}" -lt 1 ]] && RAYON_NUM_THREADS=1
RAYON_SOURCE="nproc(${NPROC}) / PARALLEL(${PARALLEL_EFFECTIVE})"
fi
export RAYON_NUM_THREADS
# Source + build scratch lives under $HOME/.cache (flatpak-visible via
# --filesystem=home). /tmp was tried first but flatpak's sandbox can't see
# /tmp, so Godot rejected the --path argument with "Invalid project path".
# $HOME/.cache/ also satisfies the apricot-isolation rule (not under ~/Code/@projects,
# not under project-buildspace where the canonical checkout lives) and is
# convention-cleanable.
SCRATCH="\$HOME/.cache/mc-src-${STAMP}" # expanded on apricot
RESULTS="\$HOME/.cache/mc-batches/${STAMP}" # expanded on apricot
# Resolve $HOME on apricot so SCRATCH / RESULTS are fully-qualified paths on that host.
SCRATCH_ABS="$(ssh "${APRICOT}" "echo \$HOME/.cache/mc-src-${STAMP}")"
RESULTS_ABS="$(ssh "${APRICOT}" "echo \$HOME/.cache/mc-batches/${STAMP}")"
# Canonical checkout: persistent clone of magicciv from forge, lives in the
# autocommit-excluded buildspace dir. Worktrees branch off this for per-run
# scratch trees with shared object pool — fast + reproducible.
CANONICAL_ABS="$(ssh "${APRICOT}" "echo \$HOME/Code/project-buildspace/magic-civilization")"
BUILD_REF="${BUILD_REF:-origin/main}"
echo "============================================================"
echo "apricot-run.sh mode=${MODE} stamp=${STAMP}"
echo " EDIT host: $(hostname)"
echo " RUN host: ${APRICOT}"
echo " CANONICAL: ${CANONICAL_ABS} (persistent clone of forge)"
echo " BUILD_REF: ${BUILD_REF}"
echo " SCRATCH: ${SCRATCH_ABS} (per-run worktree, ephemeral)"
echo " RESULTS: ${RESULTS_ABS} (persistent batch output)"
echo " PARALLEL: ${PARALLEL_EFFECTIVE} (source: ${PARALLEL_SOURCE})"
echo " RAYON_NUM_THREADS/instance: ${RAYON_NUM_THREADS} (source: ${RAYON_SOURCE})"
echo " Total CPU saturation: ${PARALLEL_EFFECTIVE} × ${RAYON_NUM_THREADS} = $((PARALLEL_EFFECTIVE * RAYON_NUM_THREADS))/${NPROC} cores"
echo " AI_GPU_ROLLOUT: ${AI_GPU_ROLLOUT:-true (default on for smoke/clan)}"
echo "============================================================"
# ── Step 1: fetch forge → canonical → per-run worktree ───────────────────────
# Source flow is forge → canonical → worktree. Plum is NOT in this path; agents
# push via ACS (commits-tray on plum pushes every ~5 min). To test a specific
# SHA, set BUILD_REF=<sha> in the environment.
echo "[$(date +%H:%M:%S)] fetch origin in canonical, then worktree ${BUILD_REF}${SCRATCH_ABS}..."
ssh "${APRICOT}" "set -euo pipefail; \
test -d '${CANONICAL_ABS}/.git' || { \
echo 'ERROR: canonical checkout missing at ${CANONICAL_ABS}' >&2; \
echo 'one-time setup: git clone http://forge.black.local:3000/magicciv/magicciv.git ${CANONICAL_ABS}' >&2; \
echo 'then: git -C ${CANONICAL_ABS} config core.hooksPath \$HOME/Code/project-buildspace/.hooks' >&2; \
exit 1; \
}; \
git -C '${CANONICAL_ABS}' fetch origin --quiet; \
git -C '${CANONICAL_ABS}' worktree add --detach '${SCRATCH_ABS}' '${BUILD_REF}'"
# Resolve the actual SHA we built (BUILD_REF may be a branch name).
BUILT_SHA="$(ssh "${APRICOT}" "git -C '${SCRATCH_ABS}' rev-parse --short HEAD")"
echo " built SHA: ${BUILT_SHA}"
# ── Step 2: build + deploy via build-gdext.sh ────────────────────────────────
# Canonical build script: runs `cargo build --release --target x86_64-unknown-linux-gnu`
# AND copies the output from .local/build/rust/$TARGET/release/libmagic_civ_physics_gdext.so
# into src/game/engine/addons/magic_civ_physics/libmagic_civ_physics.x86_64.so
# with the name Godot's .gdextension file expects.
echo "[$(date +%H:%M:%S)] build-gdext.sh x86_64-unknown-linux-gnu (in SCRATCH)..."
# Never pipe build output through `| tail -N` — ssh inherits the pipe's
# exit code (0 from tail), which masks build-gdext.sh failures. That's
# how a stale April-16 .so ran for 2 hours on 2026-04-17 while bullets
# 3-5 of p0-32 stayed ✗ with no visible warning. Let the full log through.
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}/src/simulator' && bash build-gdext.sh x86_64-unknown-linux-gnu"
# Also purge the stale .dylib that the rsync carried from the Mac — flatpak Godot
# on Linux shouldn't try to load a macOS binary even though the .gdextension config
# only points at it for macos.* target strings.
ssh "${APRICOT}" "rm -f '${SCRATCH_ABS}/src/game/engine/addons/magic_civ_physics/libmagic_civ_physics.dylib'"
# ── Step 3: populate .godot/global_script_class_cache.cfg via editor pre-pass ──
# Fresh scratch tree has an empty .godot/ cache. Godot's class_name resolution
# (the `as Weather` / `as SplitPanelContainer` idiom) requires this cache to be
# populated by an editor-mode scan. Without it, headless autoplay cascades through
# "Could not find type X" → "Compilation failed" → turn_manager.gd fails to load.
#
# `--editor --quit` opens, imports, scans class_names, writes cache, exits.
# Expected exit code: 0. We grep the log to verify the cache got written.
echo "[$(date +%H:%M:%S)] editor pre-pass to populate .godot/ class cache..."
ssh "${APRICOT}" "set -euo pipefail; \
flatpak run --user --filesystem=home --command=godot \
org.godotengine.Godot --headless --editor --quit \
--path '${SCRATCH_ABS}/src/game' 2>&1 | tail -5; \
test -s '${SCRATCH_ABS}/src/game/.godot/global_script_class_cache.cfg' && \
echo ' ✓ class cache populated' || \
echo ' ⚠ class cache missing — headless autoplay may still cascade'"
# ── Step 4: run the batch per MODE ───────────────────────────────────────────
ssh "${APRICOT}" "mkdir -p ${RESULTS_ABS}"
case "${MODE}" in
smoke)
SEEDS="${1:-10}"; TURNS="${2:-300}"
# Default: use the GPU when available (MCTS rollouts through WGSL kernel).
# gpu-walltime mode overrides this explicitly to true/false per iteration.
# Default AI_GPU_ROLLOUT=false for smoke/clan. The GPU integration
# (p0-20 task #10) is parity-verified on isolated rollouts, but
# enabling it in a 2-player smoke produced a deterministic
# "P0 always wins at T11-T18, P1 never founds" regression on
# 2026-04-18. Opt-in via env override; gpu-walltime flips
# per-iteration as its explicit comparison.
GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}"
# p2-44b: forward MC_AI_* diagnostic envs so instrumentation prints surface in game.log
MC_AI_ENV=""
for var in MC_AI_PROMOTION_DEBUG; do
val="${!var:-}"
if [[ -n "${val}" ]]; then
MC_AI_ENV="${MC_AI_ENV} ${var}=${val}"
fi
done
echo "[$(date +%H:%M:%S)] smoke batch: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL} ${GPU_ENV}${MC_AI_ENV}"
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true ${GPU_ENV}${MC_AI_ENV} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/smoke 2>&1 | tail -30"
;;
clan)
CLAN="${1:?usage: apricot-run.sh clan <clan_id> [seeds] [turns]}"
SEEDS="${2:-10}"; TURNS="${3:-300}"
# Default AI_GPU_ROLLOUT=false for smoke/clan. The GPU integration
# (p0-20 task #10) is parity-verified on isolated rollouts, but
# enabling it in a 2-player smoke produced a deterministic
# "P0 always wins at T11-T18, P1 never founds" regression on
# 2026-04-18. Opt-in via env override; gpu-walltime flips
# per-iteration as its explicit comparison.
GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}"
echo "[$(date +%H:%M:%S)] clan=${CLAN} batch: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL} ${GPU_ENV}"
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true AI_PIN_PERSONALITY='${CLAN}' ${GPU_ENV} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/clan-${CLAN} 2>&1 | tail -30"
;;
difficulty)
DIFF_TIER="${1:?usage: apricot-run.sh difficulty <easy|normal|hard|insane> [seeds] [turns]}"
SEEDS="${2:-10}"; TURNS="${3:-300}"
GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}"
echo "[$(date +%H:%M:%S)] difficulty=${DIFF_TIER} batch: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL} ${GPU_ENV}"
# AI_DIFFICULTY_P0 + AI_DIFFICULTY_P1 apply the modifier to BOTH players
# (including the human-slot player 0 which is_human=true). This is
# required for symmetric Easy-vs-Easy / Hard-vs-Hard tier_peak differentiation.
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true AI_DIFFICULTY='${DIFF_TIER}' \
AI_DIFFICULTY_P0='${DIFF_TIER}' AI_DIFFICULTY_P1='${DIFF_TIER}' \
${GPU_ENV} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/difficulty-${DIFF_TIER} 2>&1 | tail -30"
;;
difficulty-asym)
P0_TIER="${1:?usage: apricot-run.sh difficulty-asym <p0-tier> <p1-tier> [seeds] [turns]}"
P1_TIER="${2:?usage: apricot-run.sh difficulty-asym <p0-tier> <p1-tier> [seeds] [turns]}"
SEEDS="${3:-10}"; TURNS="${4:-300}"
GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}"
echo "[$(date +%H:%M:%S)] difficulty-asym p0=${P0_TIER} p1=${P1_TIER}: ${SEEDS} seeds T${TURNS}"
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true AI_DIFFICULTY_P0='${P0_TIER}' AI_DIFFICULTY_P1='${P1_TIER}' \
${GPU_ENV} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} \
${RESULTS_ABS}/difficulty-asym-${P0_TIER}-vs-${P1_TIER} 2>&1 | tail -30"
;;
clan-priors)
# Like `clan` but with AI_MCTS_PRIORS=true — used to verify p0-38
# tree-shape divergence across 5 clan personalities.
CLAN="${1:?usage: apricot-run.sh clan-priors <clan_id> [seeds] [turns]}"
SEEDS="${2:-10}"; TURNS="${3:-300}"
GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}"
echo "[$(date +%H:%M:%S)] clan-priors=${CLAN} batch: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL} priors=true"
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true AI_PIN_PERSONALITY='${CLAN}' AI_MCTS_PRIORS=true ${GPU_ENV} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/clan-priors-${CLAN} 2>&1 | tail -30"
;;
gpu-walltime)
SEEDS="${1:-10}"; TURNS="${2:-300}"
echo "[$(date +%H:%M:%S)] GPU wall-time comparison: ${SEEDS} seeds T${TURNS}"
for GPU in true false; do
echo " --- AI_GPU_ROLLOUT=${GPU} ---"
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true AI_GPU_ROLLOUT=${GPU} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/gpu-${GPU} 2>&1 | tail -10"
done
;;
matchup-grid)
# Run all C(5,2)=10 clan-pair matchups serially (pairs run one at a time;
# seeds within a pair use PARALLEL concurrency). Uses the scratch-resident
# binary so we never touch ~/Code on the RUN host.
SEEDS_PER_PAIR="${1:-5}"; TURNS="${2:-300}"
REMOTE_GRID="${RESULTS_ABS}/matchup-grid"
echo "[$(date +%H:%M:%S)] matchup-grid: ${SEEDS_PER_PAIR} seeds/pair T${TURNS} PARALLEL=${PARALLEL}"
ssh "${APRICOT}" "set -euo pipefail; mkdir -p '${REMOTE_GRID}'; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true PARALLEL=${PARALLEL} RAYON_NUM_THREADS=${RAYON_NUM_THREADS} \
COUNT=${SEEDS_PER_PAIR} TURN_LIMIT=${TURNS} \
MATCHUP_OUTPUT='${REMOTE_GRID}' \
bash tools/matchup-grid.sh 2>&1 | tail -40"
;;
huge-map-5clan)
SEEDS="${1:-5}"; TURNS="${2:-300}"
REMOTE_HUGE="${RESULTS_ABS}/huge-map-5clan"
echo "[$(date +%H:%M:%S)] huge-map-5clan: ${SEEDS} seeds T${TURNS} PARALLEL=${PARALLEL}"
ssh "${APRICOT}" "set -euo pipefail; mkdir -p '${REMOTE_HUGE}'; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true PARALLEL=${PARALLEL} RAYON_NUM_THREADS=${RAYON_NUM_THREADS} \
COUNT=${SEEDS} TURN_LIMIT=${TURNS} \
HUGE_OUTPUT='${REMOTE_HUGE}' \
bash tools/huge-map-5clan.sh 2>&1 | tail -40"
;;
ai-quality-baseline|ai-quality-baseline-pre-c|ai-quality-baseline-post-c)
# p0-20 Phase C — empirical AI-quality baseline.
# Runs `difficulty normal` + `difficulty hard` back-to-back so
# pre-C / post-C comparison covers SEEDS×2 games per build with one
# build+worktree setup. Trailing `<tier1> <tier2>` are optional and
# default to `normal hard`.
SEEDS="${1:-50}"; TURNS="${2:-300}"
TIER1="${3:-normal}"; TIER2="${4:-hard}"
GPU_ENV="AI_GPU_ROLLOUT=${AI_GPU_ROLLOUT:-false}"
echo "[$(date +%H:%M:%S)] ai-quality-baseline (${MODE}): ${SEEDS} seeds × ${TIER1}+${TIER2}, T${TURNS}, PARALLEL=${PARALLEL}"
for TIER in "${TIER1}" "${TIER2}"; do
echo " --- baseline tier=${TIER} ---"
ssh "${APRICOT}" "set -euo pipefail; cd '${SCRATCH_ABS}' && \
AI_USE_MCTS=true AI_DIFFICULTY='${TIER}' \
AI_DIFFICULTY_P0='${TIER}' AI_DIFFICULTY_P1='${TIER}' \
${GPU_ENV} PARALLEL=${PARALLEL} \
bash tools/autoplay-batch.sh ${SEEDS} ${TURNS} ${RESULTS_ABS}/baseline-${TIER} 2>&1 | tail -20"
done
;;
*)
echo "ERROR: unknown mode '${MODE}'" >&2
exit 2
;;
esac
# ── Step 5: fetch results summary back to EDIT ───────────────────────────────
LOCAL_RESULTS="${PROJECT_DIR}/.local/iter/apricot-${STAMP}"
mkdir -p "${LOCAL_RESULTS}"
echo "[$(date +%H:%M:%S)] fetch verdict/summary to ${LOCAL_RESULTS}..."
scp -r "${APRICOT}:${RESULTS_ABS}/" "${LOCAL_RESULTS}/" 2>/dev/null || \
echo "WARN: scp returned non-zero; check manually on ${APRICOT}:${RESULTS_ABS}"
# ── Step 6: remove the per-run worktree ──────────────────────────────────────
# Canonical .git/ + objects retained for the next run. Working tree + cargo
# target dir are gone. RESULTS dir under ~/.cache/mc-batches/ is untouched.
echo "[$(date +%H:%M:%S)] remove worktree ${SCRATCH_ABS}..."
ssh "${APRICOT}" "git -C '${CANONICAL_ABS}' worktree remove --force '${SCRATCH_ABS}' 2>&1 || \
rm -rf '${SCRATCH_ABS}'"
# ── Step 7: prune old local copies — keep only the 3 most recent ─────────────
ITER_ROOT="${PROJECT_DIR}/.local/iter"
if [[ -d "${ITER_ROOT}" ]]; then
# List apricot-* dirs newest-first, skip the first 3, delete the rest.
mapfile -t OLD_RUNS < <(ls -1dt "${ITER_ROOT}"/apricot-* 2>/dev/null | tail -n +4)
if (( ${#OLD_RUNS[@]} > 0 )); then
echo "[$(date +%H:%M:%S)] pruning ${#OLD_RUNS[@]} old local run(s) (keeping 3 newest)..."
for d in "${OLD_RUNS[@]}"; do
echo " rm -rf ${d}"
rm -rf "${d}"
done
fi
fi
echo "============================================================"
echo "DONE. Built ${BUILT_SHA} from ${BUILD_REF}."
echo " Scratch worktree at ${APRICOT}:${SCRATCH_ABS} — REMOVED."
echo " Results at ${APRICOT}:${RESULTS_ABS} (persistent, .cache)."
echo " Local copy at ${LOCAL_RESULTS}"
echo " Canonical at ${APRICOT}:${CANONICAL_ABS} (kept for next run)."
echo "============================================================"