2026-06-27 06:12:07 -04:00
#!/usr/bin/env bash
2026-06-27 08:51:09 -04:00
# Distributed test/train dispatch — fan the iteration loop across the DigitalOcean
2026-06-27 06:12:07 -04:00
# test fleet. Sourced by ./run (defines cmd_dist_*). Auto-registered via the
# cmd_<verb>_<target> name-dispatch, so no edit to the top-level `run` is needed.
#
2026-06-27 08:51:09 -04:00
# ./run dist:up <workers> [size] [region] spin the fleet up
2026-06-27 06:12:07 -04:00
# ./run dist:sim <games> [turn_limit] [--destroy-after] fan a sim batch across it
# ./run dist:train <total_steps> [--destroy-after] fan an RL sweep across it
# ./run dist:down tear it down (zero cost)
#
2026-06-27 08:51:09 -04:00
# Requires: TF_VAR_do_token in env, terraform on PATH, and a coordinator with
2026-06-27 06:12:07 -04:00
# GNU coreutils (autoplay-batch.sh uses `realpath -m`).
_DIST_TF_DIR_REL = "infra/terraform/test-fleet"
_dist_repo_root( ) { ( cd " $( dirname " ${ BASH_SOURCE [0] } " ) /../.. " && pwd ) ; }
_dist_tf( ) {
local root
root = " $( _dist_repo_root) "
terraform -chdir= " $root / $_DIST_TF_DIR_REL " " $@ "
}
_dist_read_hosts( ) {
# Echo one "<user>@<ip>" per line from the inventory, skipping comments/blanks.
local inv = " $1 "
grep -vE '^\s*(#|$)' " $inv " 2>/dev/null || true
}
2026-06-27 12:45:29 -04:00
_dist_wait_ready( ) {
# Block until each worker's cloud-init finishes — it copies the fleet key to the
# build user and git-pulls. DO's boot agent install delays runcmd 1-3 min, so the
# build user isn't ssh-able until then. We ssh as root (authorized immediately) to wait.
local root inv host ip
root = " $( _dist_repo_root) " ; inv = " $root /.local/fleet/inventory "
[ -f " $inv " ] || return 0
while IFS = read -r host; do
ip = " ${ host #*@ } "
printf ' waiting for %s cloud-init... ' " $ip "
local _i
for _i in $( seq 1 36) ; do
ssh -n -o StrictHostKeyChecking = accept-new -o ConnectTimeout = 8 -o BatchMode = yes -i ~/.ssh/id_mc_fleet " root@ $ip " true 2>/dev/null && break
sleep 5
done
ssh -n -o BatchMode = yes -i ~/.ssh/id_mc_fleet " root@ $ip " 'cloud-init status --wait >/dev/null 2>&1 || true' 2>/dev/null
echo "ready"
done < <( _dist_read_hosts " $inv " )
}
2026-06-27 06:12:07 -04:00
cmd_dist( ) {
cat <<'EOF'
2026-06-27 08:51:09 -04:00
Distributed test/train fleet ( DigitalOcean) . Set TF_VAR_do_token first.
2026-06-27 06:29:41 -04:00
./run dist:check offline: fmt + validate + mocked test ( no token/spend)
2026-06-27 14:41:01 -04:00
./run dist:image [ --cold] ( re) build golden image — incremental by default ( ~3-8min vs ~20 cold)
2026-06-27 14:51:06 -04:00
./run dist:prune [ keep = 2] delete superseded golden snapshots ( ~$0 .40/mo each)
2026-06-27 08:51:09 -04:00
./run dist:up <workers> [ size] [ region] e.g. ./run dist:up 10
2026-06-27 06:12:07 -04:00
./run dist:sim <games> [ turn_limit] [ --destroy-after]
./run dist:train <total_steps> [ --destroy-after]
2026-06-27 09:24:30 -04:00
./run dist:test cargo test --workspace on a worker
./run dist:build cargo build + wasm on a worker ( wasm rsync' d back)
2026-06-28 06:02:33 -04:00
./run dist:publish build once → upload .so/wasm to the artifact Space ( keyed by sha)
./run dist:fetch download the prebuilt .so for HEAD' s sha ( skip recompile)
./run dist:sync [ ref] git pull → fetch prebuilt .so if published, else build
./run dist:models { push <src> <name>| pull <name> <dest>| ls} share RL models via the Space
2026-06-27 09:56:56 -04:00
./run dist:render <res://scene.tscn> <out.png> render a proof scene ( software weston, no GPU) → png
2026-06-27 06:12:07 -04:00
./run dist:down
EOF
}
2026-06-27 06:29:41 -04:00
cmd_dist_check( ) {
2026-06-27 08:51:09 -04:00
# Offline IaC verification — no DigitalOcean token, no API, no servers, no cost.
2026-06-27 06:29:41 -04:00
# fmt (style) + validate (schema typecheck) + test (mocked-provider behaviour).
local root
root = " $( _dist_repo_root) "
local dir = " $root / $_DIST_TF_DIR_REL "
echo "== terraform fmt =="
terraform -chdir= " $dir " fmt -check -recursive || { echo " fmt: run 'terraform -chdir= $dir fmt' " >& 2; return 1; }
echo "== terraform init (providers only) =="
terraform -chdir= " $dir " init -backend= false -input= false >/dev/null || return 1
echo "== terraform validate (schema typecheck) =="
terraform -chdir= " $dir " validate || return 1
2026-06-27 08:51:09 -04:00
echo "== terraform test (mocked digitalocean) =="
2026-06-27 06:29:41 -04:00
terraform -chdir= " $dir " test || return 1
echo "dist:check OK — config is valid, no resources touched."
}
2026-06-27 14:41:01 -04:00
cmd_dist_image( ) {
# (Re)build the golden image. INCREMENTAL by default: builds FROM the newest
# mc-golden snapshot, so provision.sh (idempotent) only redoes changed work
# (~3-8 min). --cold builds from stock Ubuntu (~20 min) — resets accumulated
# layer cruft; run occasionally. Needs ~/.vault/{do_pat_mc,mc_forge_creds}.
local cold = false a
for a in " $@ " ; do [ " $a " = "--cold" ] && cold = true; done
local root pat
root = " $( _dist_repo_root) "
pat = " $( cat ~/.vault/do_pat_mc 2>/dev/null) "
[ -n " $pat " ] || { echo "no ~/.vault/do_pat_mc" >& 2; return 1; }
export DIGITALOCEAN_TOKEN = " $pat "
# shellcheck disable=SC1090
. ~/.vault/mc_forge_creds
export PKR_VAR_git_remote = " http:// ${ ADMIN_USER } : ${ ADMIN_PASS } @ ${ FORGE_IP } :3000/mcadmin/magicciv.git "
PKR_VAR_fleet_pubkey = " $( cat ~/.ssh/id_mc_fleet.pub) " ; export PKR_VAR_fleet_pubkey
local base = "ubuntu-24-04-x64" prev
if ! $cold ; then
prev = " $( curl -s -H " Authorization: Bearer $pat " "https://api.digitalocean.com/v2/snapshots?resource_type=droplet&per_page=200" \
| python3 -c "import sys,json;s=[x for x in json.load(sys.stdin)['snapshots'] if x['name'].startswith('mc-golden')];s.sort(key=lambda x:x['created_at']);print(s[-1]['id'] if s else '')" 2>/dev/null) "
if [ -n " $prev " ] ; then base = " $prev " ; echo " INCREMENTAL rebuild from snapshot $base (pass --cold for a full rebuild) " ; else echo "no prior golden — cold build" ; fi
else
echo " COLD rebuild from $base "
fi
export PKR_VAR_base_image = " $base "
( cd " $root /infra/packer " && packer init golden-image.pkr.hcl >/dev/null && packer build golden-image.pkr.hcl )
2026-06-27 14:51:06 -04:00
echo "tip: each rebuild leaves a snapshot (~\$0.40/mo) — './run dist:prune' deletes superseded ones."
}
cmd_dist_prune( ) {
# Delete superseded golden snapshots, keeping the newest N (default 2).
local keep = " ${ 1 :- 2 } " pat old id
pat = " $( cat ~/.vault/do_pat_mc 2>/dev/null) "
[ -n " $pat " ] || { echo "no ~/.vault/do_pat_mc" >& 2; return 1; }
old = " $( curl -s -H " Authorization: Bearer $pat " "https://api.digitalocean.com/v2/snapshots?resource_type=droplet&per_page=200" \
| python3 -c " import sys,json;s=[x for x in json.load(sys.stdin)['snapshots'] if x['name'].startswith('mc-golden')];s.sort(key=lambda x:x['created_at']);[print(x['id']) for x in s[:- ${ keep } ]] " 2>/dev/null) "
[ -n " $old " ] || { echo " nothing to prune (<= $keep golden snapshots) " ; return 0; }
for id in $old ; do
curl -s -o /dev/null -w " pruned golden snapshot $id : http %{http_code}\n " -X DELETE -H " Authorization: Bearer $pat " " https://api.digitalocean.com/v2/snapshots/ $id "
done
2026-06-27 14:41:01 -04:00
}
2026-06-27 06:12:07 -04:00
cmd_dist_up( ) {
local n = " ${ 1 :- } "
2026-06-27 08:51:09 -04:00
[ [ " $n " = ~ ^[ 0-9] +$ ] ] || { echo "usage: ./run dist:up <workers> [size] [region]" >& 2; return 1; }
: " ${ TF_VAR_do_token : ?export TF_VAR_do_token=<DigitalOcean API token> first } "
2026-06-27 06:12:07 -04:00
local args = ( -auto-approve -var " workers= $n " )
2026-06-27 08:51:09 -04:00
[ -n " ${ 2 :- } " ] && args += ( -var " size= $2 " )
[ -n " ${ 3 :- } " ] && args += ( -var " region= $3 " )
2026-06-27 06:12:07 -04:00
_dist_tf init -input= false >/dev/null
2026-06-27 12:45:29 -04:00
_dist_tf apply " ${ args [@] } " || { echo "dist:up FAILED — terraform apply errored (see above)" >& 2; return 1; }
echo " fleet up: $n worker(s) — waiting for cloud-init before they're usable... "
_dist_wait_ready
echo " fleet ready. inventory: $( _dist_repo_root) /.local/fleet/inventory "
2026-06-27 06:12:07 -04:00
}
cmd_dist_down( ) {
2026-06-27 08:51:09 -04:00
: " ${ TF_VAR_do_token : ?export TF_VAR_do_token=<DigitalOcean API token> first } "
2026-06-27 06:12:07 -04:00
_dist_tf apply -auto-approve -var "workers=0"
2026-06-27 08:51:09 -04:00
echo " fleet down (workers=0): zero compute cost, snapshot only (~ $0 .40/mo). "
2026-06-27 06:12:07 -04:00
}
cmd_dist_sim( ) {
local total = " ${ 1 :- } " turn = " ${ 2 :- 300 } " destroy = false
local a
for a in " $@ " ; do [ " $a " = "--destroy-after" ] && destroy = true; done
[ [ " $total " = ~ ^[ 0-9] +$ ] ] || { echo "usage: ./run dist:sim <total_games> [turn_limit] [--destroy-after]" >& 2; return 1; }
local root inv
root = " $( _dist_repo_root) "
inv = " $root /.local/fleet/inventory "
[ -f " $inv " ] || { echo " no inventory at $inv — run ./run dist:up <N> first " >& 2; return 1; }
local hosts = ( )
while IFS = read -r line; do hosts += ( " $line " ) ; done < <( _dist_read_hosts " $inv " )
local n = ${# hosts [@] }
[ " $n " -gt 0 ] || { echo "inventory empty — fleet is down" >& 2; return 1; }
local stamp results shard
stamp = " $( date +%Y%m%d_%H%M%S) "
results = " $root /.local/iter/ $stamp "
mkdir -p " $results "
shard = $(( ( total + n - 1 ) / n )) # ceil(total / n)
echo " distributing $total game(s) over $n worker(s): ~ $shard each, turn_limit= $turn "
echo " results → $results "
local pids = ( ) i = 0 host offset cnt cores
for host in " ${ hosts [@] } " ; do
offset = $(( i * shard ))
cnt = $shard
( ( offset + cnt > total ) ) && cnt = $(( total - offset ))
( ( cnt <= 0 ) ) && break
cores = " $( ssh -o BatchMode = yes -o StrictHostKeyChecking = accept-new " $host " nproc 2>/dev/null || echo 8) "
echo " [ $host ] seeds $(( offset + 1 )) .. $(( offset + cnt )) PARALLEL= $cores "
AUTOPLAY_HOST = " $host " SEED_OFFSET = " $offset " PARALLEL = " $cores " \
bash " $root /tools/autoplay-batch.sh " " $cnt " " $turn " " $results " \
>" $results /dispatch_worker_ ${ i } .log " 2>& 1 &
pids += ( $! )
i = $(( i + 1 ))
done
local fail = 0 p
for p in " ${ pids [@] } " ; do wait " $p " || fail = $(( fail + 1 )) ; done
local produced
produced = " $( find " $results " -name turn_stats.jsonl -type f 2>/dev/null | wc -l | tr -d ' ' ) "
echo "----------------------------------------------------------------"
echo " distributed sim done: $produced game(s) produced turn_stats under $results "
[ " $fail " -eq 0 ] || echo " WARNING: $fail worker batch(es) errored — see $results /dispatch_worker_*.log " >& 2
$destroy && { echo "--destroy-after → tearing down" ; cmd_dist_down; }
[ " $fail " -eq 0 ]
}
cmd_dist_train( ) {
# v1 blocking sweep: one training run per worker (distinct seed + run-name),
# then pull the models back. Detached orchestration is the documented follow-up.
local steps = " ${ 1 :- 1000000 } " destroy = false
local a
for a in " $@ " ; do [ " $a " = "--destroy-after" ] && destroy = true; done
[ [ " $steps " = ~ ^[ 0-9] +$ ] ] || { echo "usage: ./run dist:train <total_steps> [--destroy-after]" >& 2; return 1; }
local root inv
root = " $( _dist_repo_root) "
inv = " $root /.local/fleet/inventory "
[ -f " $inv " ] || { echo " no inventory at $inv — run ./run dist:up <N> first " >& 2; return 1; }
local hosts = ( )
while IFS = read -r line; do hosts += ( " $line " ) ; done < <( _dist_read_hosts " $inv " )
local n = ${# hosts [@] }
[ " $n " -gt 0 ] || { echo "inventory empty — fleet is down" >& 2; return 1; }
local stamp results
stamp = " $( date +%Y%m%d_%H%M%S) "
results = " $root /.local/train/ $stamp "
mkdir -p " $results "
echo " fanning $n training run(s) × $steps steps (CPU). results → $results "
local repo_remote = "Code/@projects/@magic-civilization"
local pids = ( ) i = 0 host seed run
for host in " ${ hosts [@] } " ; do
seed = $(( 42 + i ))
run = " dist- ${ stamp } -w ${ i } "
echo " [ $host ] run= $run seed= $seed "
ssh -o BatchMode = yes -o StrictHostKeyChecking = accept-new " $host " \
" cd ~/ $repo_remote && python3 -m tooling.rl_self_play.train --run-name ' $run ' --seed $seed --total-steps $steps --device cpu " \
>" $results /train_worker_ ${ i } .log " 2>& 1 &
pids += ( $! )
i = $(( i + 1 ))
done
local fail = 0 p
for p in " ${ pids [@] } " ; do wait " $p " || fail = $(( fail + 1 )) ; done
# Pull each worker's model dir back.
i = 0
for host in " ${ hosts [@] } " ; do
run = " dist- ${ stamp } -w ${ i } "
rsync -az " $host :~/ $repo_remote /tooling/rl_self_play/models/ $run " " $results / " 2>/dev/null || \
echo " note: no model dir for $run on $host (check $results /train_worker_ ${ i } .log) "
i = $(( i + 1 ))
done
echo "----------------------------------------------------------------"
echo " distributed train done under $results "
[ " $fail " -eq 0 ] || echo " WARNING: $fail run(s) errored — see $results /train_worker_*.log " >& 2
$destroy && { echo "--destroy-after → tearing down" ; cmd_dist_down; }
[ " $fail " -eq 0 ]
}
2026-06-27 09:24:30 -04:00
# ── compute offload (single worker) ──────────────────────────────────────────
# Run heavy build/test compute on a DO worker instead of plum (M2 Air). Workers
# already carry the toolchain (golden image) + repo (cloud-init git pull).
_dist_first_host( ) {
2026-06-28 06:02:33 -04:00
local inv h
2026-06-27 09:24:30 -04:00
inv = " $( _dist_repo_root) /.local/fleet/inventory "
[ -f " $inv " ] || return 1
2026-06-28 06:02:33 -04:00
h = " $( _dist_read_hosts " $inv " | head -1) "
[ -n " $h " ] || return 1 # inventory present but no live host (e.g. "fleet is down")
printf '%s\n' " $h "
2026-06-27 09:24:30 -04:00
}
cmd_dist_sync( ) {
2026-06-28 06:02:33 -04:00
# Pull the given ref on every live worker, then make the GDExtension current:
# fetch the prebuilt .so for that sha from the artifact Space if it exists
# (seconds), else build it. So a mid-session code change reaches the fleet
# without an image rebuild, and N workers share one published build.
2026-06-27 09:24:30 -04:00
local ref = " ${ 1 :- main } "
2026-06-28 06:02:33 -04:00
local root inv host senv
2026-06-27 09:24:30 -04:00
root = " $( _dist_repo_root) "
inv = " $root /.local/fleet/inventory "
[ -f " $inv " ] || { echo "no fleet — run ./run dist:up <N> first" >& 2; return 1; }
2026-06-28 06:02:33 -04:00
senv = " $( _dist_spaces_env 2>/dev/null || true ) " # empty → workers just build
2026-06-27 09:24:30 -04:00
local pids = ( ) p fail = 0
while IFS = read -r host; do
2026-06-28 06:02:33 -04:00
echo " [ $host ] sync → $ref (fetch prebuilt .so, else build) "
ssh -o BatchMode = yes -o StrictHostKeyChecking = accept-new -i " $HOME /.ssh/id_mc_fleet " " $host " \
" $senv SPACE=' $_DIST_SPACE ' SO_PATH=' $_DIST_SO_PATH ' REF=' $ref ' bash -s " <<'REMOTE' &
set -e
cd ~/Code/@projects/@magic-civilization
git fetch --depth= 1 origin " $REF " && git reset --hard FETCH_HEAD
SHA = $( git rev-parse HEAD)
. ~/.cargo/env
if [ -n " ${ RCLONE_S3_ACCESS_KEY_ID :- } " ] && rclone copyto " :s3: $SPACE /builds/ $SHA /libmagic_civ_physics.x86_64.so " " $SO_PATH " 2>/dev/null; then
echo " [ $SHA ] fetched prebuilt .so (no rebuild) "
else
( cd src/simulator && bash build-gdext.sh ) && echo " [ $SHA ] built .so (cache miss) "
fi
REMOTE
2026-06-27 09:24:30 -04:00
pids += ( $! )
done < <( _dist_read_hosts " $inv " )
for p in " ${ pids [@] } " ; do wait " $p " || fail = $(( fail + 1 )) ; done
[ " $fail " -eq 0 ] && echo " synced all workers to $ref " || { echo " $fail worker(s) failed sync " >& 2; return 1; }
}
cmd_dist_test( ) {
# Offload the Rust test suite to one fast worker (slow on the M2 Air).
local host repo
host = " $( _dist_first_host) " || { echo "no fleet — run ./run dist:up 1 c-8 first" >& 2; return 1; }
repo = "Code/@projects/@magic-civilization"
echo " running cargo tests on $host ... "
2026-06-28 06:02:33 -04:00
ssh -n -o BatchMode = yes -o StrictHostKeyChecking = accept-new -i " $HOME /.ssh/id_mc_fleet " " $host " "
2026-06-27 09:24:30 -04:00
set -e
cd ~/$repo /src/simulator && . ~/.cargo/env
if command -v cargo-nextest >/dev/null 2>& 1; then cargo nextest run --workspace; else cargo test --workspace; fi
"
}
cmd_dist_build( ) {
# Offload the workspace build for fast compile feedback, and bring back the
# platform-independent WASM artifact. The native .so is linux-only and stays
# on the worker (plum builds its own macOS .dylib locally).
local host root repo
host = " $( _dist_first_host) " || { echo "no fleet — run ./run dist:up 1 first" >& 2; return 1; }
root = " $( _dist_repo_root) "
repo = "Code/@projects/@magic-civilization"
echo " building workspace + wasm on $host ... "
2026-06-28 06:02:33 -04:00
ssh -n -o BatchMode = yes -o StrictHostKeyChecking = accept-new -i " $HOME /.ssh/id_mc_fleet " " $host " "
2026-06-27 09:24:30 -04:00
set -e
cd ~/$repo /src/simulator && . ~/.cargo/env
cargo build --workspace
bash build-wasm.sh
"
echo "fetching wasm artifact → plum ..."
mkdir -p " $root /.local/build/wasm "
rsync -az " $host :~/ $repo /.local/build/wasm/ " " $root /.local/build/wasm/ " 2>/dev/null \
&& echo "wasm → .local/build/wasm/" || echo "note: no wasm at .local/build/wasm/ on worker"
}
2026-06-27 09:56:56 -04:00
cmd_dist_render( ) {
# Render a proof scene on a worker (software weston + Mesa llvmpipe, no GPU) and
# pull the PNG back to plum. Replaces the apricot SCREENSHOT_HOST flow.
local scene = " ${ 1 :- } " out = " ${ 2 :- } "
[ -n " $scene " ] && [ -n " $out " ] || { echo "usage: ./run dist:render <res://scene.tscn> <out.png> [timeout_s]" >& 2; return 1; }
local host
host = " $( _dist_first_host) " || { echo "no fleet — run ./run dist:up 1 first" >& 2; return 1; }
local user = " ${ host %@* } "
AUTOPLAY_HOST = " $host " \
PROJECT_ROOT_REMOTE = " /home/ ${ user } /Code/@projects/@magic-civilization " \
bash " $( _dist_repo_root) /tools/capture-proof.sh " " $scene " " $out " " ${ 3 :- 180 } "
}
2026-06-28 06:02:33 -04:00
# ── build-artifact Space (magicciv-artifacts on DO Spaces) ───────────────────
# Build once, publish the linux .so/wasm keyed by git sha; sim/test/AI runners
# fetch the prebuilt artifact instead of recompiling. Creds: ~/.vault/do-spaces-uvlava.*
_DIST_SPACE = "magicciv-artifacts"
_DIST_SO_PATH = "src/game/engine/addons/magic_civ_physics/libmagic_civ_physics.x86_64.so"
# Emit an `RCLONE_S3_* ...` env-prefix string (DO Spaces creds from the vault) for
# embedding in a remote ssh command. Empty (rc 1) if the keys are missing.
_dist_spaces_env( ) {
local ak sk
ak = " $( cat ~/.vault/do-spaces-uvlava.access 2>/dev/null) "
sk = " $( cat ~/.vault/do-spaces-uvlava.secret 2>/dev/null) "
[ -n " $ak " ] && [ -n " $sk " ] || return 1
printf "RCLONE_S3_PROVIDER=DigitalOcean RCLONE_S3_ENDPOINT=nyc3.digitaloceanspaces.com RCLONE_S3_ACCESS_KEY_ID='%s' RCLONE_S3_SECRET_ACCESS_KEY='%s'" " $ak " " $sk "
}
cmd_dist_publish( ) {
# On a worker: build gdext + wasm, upload to magicciv-artifacts/builds/<sha>/.
local host senv
host = " $( _dist_first_host) " || { echo "no fleet — ./run dist:up 1 first" >& 2; return 1; }
senv = " $( _dist_spaces_env) " || { echo "no DO Spaces creds in ~/.vault/do-spaces-uvlava.*" >& 2; return 1; }
echo " building + publishing artifacts on $host ... "
ssh -o BatchMode = yes -o StrictHostKeyChecking = accept-new -i " $HOME /.ssh/id_mc_fleet " " $host " \
" $senv SO_PATH=' $_DIST_SO_PATH ' SPACE=' $_DIST_SPACE ' bash -s " <<'REMOTE'
set -e
cd ~/Code/@projects/@magic-civilization
SHA = $( git rev-parse HEAD)
. ~/.cargo/env
( cd src/simulator && bash build-gdext.sh && bash build-wasm.sh )
rclone copyto " $SO_PATH " " :s3: $SPACE /builds/ $SHA /libmagic_civ_physics.x86_64.so "
[ -d .local/build/wasm ] && rclone copy .local/build/wasm " :s3: $SPACE /builds/ $SHA /wasm/ " || true
feat(sim): land sim_scenario declarative harness + scenarios for headless Game 1 proof gate
- Add mc-sim/bin/sim_scenario (pure Rust runner for JSON scenarios; drives mc-turn + worldsim pre-pass + personalities; emits BatchResult with metrics + per-seed assertion verdicts).
- Add canonical game1_headless_systems_150t.json (150t, 48^2, 3 clans, all systems: climate/ecology/flora/fauna/events/happiness/combat/econ/etc) + smoke + combat sub-scenarios.
- Wire publish in dist.sh to ship the bin to S3 alongside .so (enables fleet horizontal runs post-).
- Update AGENTS.md, finish-game-1/SKILL.md, agents-task-map, simulator-infra.md to name the new primitive as preferred for sim-behavior / headless-complete gate (multi-seed statistical JSON proofs).
- Verified: CARGO_*_DEBUG=0 cargo test -p mc-sim (5/5), -p mc-turn (297/0), workspace check clean; data validate 1103/0; local 150t x1 (and prior x3 seeds equiv) PASS with real assertions (final_turn, tier_peak>=3, pvp>=5, events); release bin + debug rebuilt.
- Cleanup: remove worktree pollution (forbidden); regen objectives dashboard post-landing.
- Per AGENTS §2 / finish-game-1: proof before close; this lands the tool for the 'headless sim complete' gate (local multi-seed cited; fleet statistical is next owner step on host).
Co-Authored-By: Grok (xAI) <noreply@x.ai>
2026-06-28 14:24:38 -04:00
# Build the pure-Rust sim scenario runner (for horizontal fleet simulation testing of declarative scenarios).
# Workers can fetch the prebuilt binary and run many scenario+seed instances in parallel without recompiles.
( cd src/simulator && cargo build --release -p mc-sim --bin sim_scenario ) || true
SIM_BIN = "src/simulator/target/release/sim_scenario"
[ -x " $SIM_BIN " ] && rclone copyto " $SIM_BIN " " :s3: $SPACE /builds/ $SHA /bin/sim_scenario " || true
2026-06-28 06:02:33 -04:00
printf 'sha=%s\nbuilt=%s\n' " $SHA " " $( date -u +%FT%TZ) " | rclone rcat " :s3: $SPACE /builds/ $SHA /meta.txt "
feat(sim): land sim_scenario declarative harness + scenarios for headless Game 1 proof gate
- Add mc-sim/bin/sim_scenario (pure Rust runner for JSON scenarios; drives mc-turn + worldsim pre-pass + personalities; emits BatchResult with metrics + per-seed assertion verdicts).
- Add canonical game1_headless_systems_150t.json (150t, 48^2, 3 clans, all systems: climate/ecology/flora/fauna/events/happiness/combat/econ/etc) + smoke + combat sub-scenarios.
- Wire publish in dist.sh to ship the bin to S3 alongside .so (enables fleet horizontal runs post-).
- Update AGENTS.md, finish-game-1/SKILL.md, agents-task-map, simulator-infra.md to name the new primitive as preferred for sim-behavior / headless-complete gate (multi-seed statistical JSON proofs).
- Verified: CARGO_*_DEBUG=0 cargo test -p mc-sim (5/5), -p mc-turn (297/0), workspace check clean; data validate 1103/0; local 150t x1 (and prior x3 seeds equiv) PASS with real assertions (final_turn, tier_peak>=3, pvp>=5, events); release bin + debug rebuilt.
- Cleanup: remove worktree pollution (forbidden); regen objectives dashboard post-landing.
- Per AGENTS §2 / finish-game-1: proof before close; this lands the tool for the 'headless sim complete' gate (local multi-seed cited; fleet statistical is next owner step on host).
Co-Authored-By: Grok (xAI) <noreply@x.ai>
2026-06-28 14:24:38 -04:00
echo " published builds/ $SHA / (.so + wasm + sim_scenario for scenario tests) "
2026-06-28 06:02:33 -04:00
REMOTE
}
cmd_dist_fetch( ) {
# On a worker: fetch the prebuilt .so for the worker's HEAD sha into the addon
# path instead of recompiling. Nonzero on a cache miss.
local host senv
host = " $( _dist_first_host) " || { echo "no fleet — ./run dist:up 1 first" >& 2; return 1; }
senv = " $( _dist_spaces_env) " || { echo "no DO Spaces creds" >& 2; return 1; }
ssh -o BatchMode = yes -o StrictHostKeyChecking = accept-new -i " $HOME /.ssh/id_mc_fleet " " $host " \
" $senv SO_PATH=' $_DIST_SO_PATH ' SPACE=' $_DIST_SPACE ' bash -s " <<'REMOTE'
set -e
cd ~/Code/@projects/@magic-civilization
SHA = $( git rev-parse HEAD)
if rclone copyto " :s3: $SPACE /builds/ $SHA /libmagic_civ_physics.x86_64.so " " $SO_PATH " 2>/dev/null; then
echo " FETCHED prebuilt .so for $SHA "
else
echo " MISS: no prebuilt .so for $SHA — run ./run dist:publish " ; exit 3
fi
REMOTE
}
cmd_dist_models( ) {
# Share RL model artifacts via the Space (runs on plum; models are platform-independent).
# ./run dist:models push <src-dir-or-file> <name> ./run dist:models pull <name> <dest> ./run dist:models ls
local sub = " ${ 1 :- } " ak sk
ak = " $( cat ~/.vault/do-spaces-uvlava.access 2>/dev/null) " ; sk = " $( cat ~/.vault/do-spaces-uvlava.secret 2>/dev/null) "
[ -n " $ak " ] && [ -n " $sk " ] || { echo "no DO Spaces creds in ~/.vault/do-spaces-uvlava.*" >& 2; return 1; }
export RCLONE_S3_PROVIDER = DigitalOcean RCLONE_S3_ENDPOINT = nyc3.digitaloceanspaces.com
export RCLONE_S3_ACCESS_KEY_ID = " $ak " RCLONE_S3_SECRET_ACCESS_KEY = " $sk "
case " $sub " in
push) [ -n " ${ 2 :- } " ] && [ -n " ${ 3 :- } " ] || { echo "usage: ./run dist:models push <src> <name>" >& 2; return 1; } ; rclone copy " $2 " " :s3: $_DIST_SPACE /models/ $3 / " -P ; ;
pull) [ -n " ${ 2 :- } " ] && [ -n " ${ 3 :- } " ] || { echo "usage: ./run dist:models pull <name> <dest>" >& 2; return 1; } ; rclone copy " :s3: $_DIST_SPACE /models/ $2 / " " $3 " -P ; ;
ls) rclone ls " :s3: $_DIST_SPACE /models/ " 2>/dev/null || echo "(empty)" ; ;
*) echo "usage: ./run dist:models {push <src> <name>|pull <name> <dest>|ls}" >& 2; return 1 ; ;
esac
}