feat(infra): DO compute-offload verbs + forge on/off lifecycle

Offload heavy compute from plum (M2 Air) to on-demand DO workers:
- dist:test  — cargo test --workspace (nextest) on a worker (the main DX win)
- dist:build — cargo build + WASM on a worker; rsync the platform-independent
  WASM back (native .so is linux-only, stays on the worker)
- dist:sync  — git pull <ref> + rebuild gdext on live workers (no image rebuild)
- forge:down/up — snapshot+destroy / restore-from-snapshot (DO bills powered-off
  droplets; only destroy stops it). ~$6/mo -> ~$0.30/mo idle; refreshes the
  forge IP in ~/.vault/mc_forge_creds on restore.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Natalie 2026-06-27 09:24:30 -04:00
parent e8dd4a85b4
commit 22f7fa1116
4 changed files with 247 additions and 18 deletions

View file

@ -34,6 +34,9 @@ Distributed test/train fleet (DigitalOcean). Set TF_VAR_do_token first.
./run dist:up <workers> [size] [region] e.g. ./run dist:up 10
./run dist:sim <games> [turn_limit] [--destroy-after]
./run dist:train <total_steps> [--destroy-after]
./run dist:test cargo test --workspace on a worker
./run dist:build cargo build + wasm on a worker (wasm rsync'd back)
./run dist:sync [ref] git pull + rebuild gdext on live workers
./run dist:down
EOF
}
@ -181,3 +184,71 @@ cmd_dist_train() {
$destroy && { echo "--destroy-after → tearing down"; cmd_dist_down; }
[ "$fail" -eq 0 ]
}
# ── compute offload (single worker) ──────────────────────────────────────────
# Run heavy build/test compute on a DO worker instead of plum (M2 Air). Workers
# already carry the toolchain (golden image) + repo (cloud-init git pull).
_dist_first_host() {
local inv
inv="$(_dist_repo_root)/.local/fleet/inventory"
[ -f "$inv" ] || return 1
_dist_read_hosts "$inv" | head -1
}
cmd_dist_sync() {
# Pull the given ref on every live worker + rebuild the GDExtension, so a
# mid-session code change reaches the fleet without an image rebuild.
local ref="${1:-main}"
local root inv host
root="$(_dist_repo_root)"
inv="$root/.local/fleet/inventory"
[ -f "$inv" ] || { echo "no fleet — run ./run dist:up <N> first" >&2; return 1; }
local pids=() p fail=0
while IFS= read -r host; do
echo "[$host] sync → $ref"
ssh -n -o BatchMode=yes -o StrictHostKeyChecking=accept-new "$host" "
set -e
cd ~/Code/@projects/@magic-civilization
git fetch --depth=1 origin '$ref' && git reset --hard FETCH_HEAD
cd src/simulator && . ~/.cargo/env && bash build-gdext.sh
" &
pids+=($!)
done < <(_dist_read_hosts "$inv")
for p in "${pids[@]}"; do wait "$p" || fail=$(( fail + 1 )); done
[ "$fail" -eq 0 ] && echo "synced all workers to $ref" || { echo "$fail worker(s) failed sync" >&2; return 1; }
}
cmd_dist_test() {
# Offload the Rust test suite to one fast worker (slow on the M2 Air).
local host repo
host="$(_dist_first_host)" || { echo "no fleet — run ./run dist:up 1 c-8 first" >&2; return 1; }
repo="Code/@projects/@magic-civilization"
echo "running cargo tests on $host ..."
ssh -n -o BatchMode=yes -o StrictHostKeyChecking=accept-new "$host" "
set -e
cd ~/$repo/src/simulator && . ~/.cargo/env
if command -v cargo-nextest >/dev/null 2>&1; then cargo nextest run --workspace; else cargo test --workspace; fi
"
}
cmd_dist_build() {
# Offload the workspace build for fast compile feedback, and bring back the
# platform-independent WASM artifact. The native .so is linux-only and stays
# on the worker (plum builds its own macOS .dylib locally).
local host root repo
host="$(_dist_first_host)" || { echo "no fleet — run ./run dist:up 1 first" >&2; return 1; }
root="$(_dist_repo_root)"
repo="Code/@projects/@magic-civilization"
echo "building workspace + wasm on $host ..."
ssh -n -o BatchMode=yes -o StrictHostKeyChecking=accept-new "$host" "
set -e
cd ~/$repo/src/simulator && . ~/.cargo/env
cargo build --workspace
bash build-wasm.sh
"
echo "fetching wasm artifact → plum ..."
mkdir -p "$root/.local/build/wasm"
rsync -az "$host:~/$repo/.local/build/wasm/" "$root/.local/build/wasm/" 2>/dev/null \
&& echo "wasm → .local/build/wasm/" || echo "note: no wasm at .local/build/wasm/ on worker"
}

113
scripts/run/forge.sh Executable file
View file

@ -0,0 +1,113 @@
#!/usr/bin/env bash
# Forgejo origin lifecycle on DigitalOcean. Sourced by ./run (defines cmd_forge_*).
# ./run forge:down stop service, snapshot, destroy droplet (~$6/mo -> ~$0.30/mo idle)
# ./run forge:up recreate from newest snapshot, refresh ~/.vault/mc_forge_creds
#
# DO bills powered-off droplets; only destroy stops billing, so "down" =
# snapshot + destroy and "up" = create-from-snapshot. The droplet gets a NEW ip
# each time, so forge:up refreshes the vault creds file (the single source of
# truth for the forge URL).
_FORGE_TAG="forgejo"
_FORGE_SIZE="s-1vcpu-1gb"
_FORGE_REGION="nyc3"
_FORGE_KEY_ID="57416789"
_FORGE_SNAP_PREFIX="mc-forge-snap"
_VAULT_PAT="$HOME/.vault/do_pat_mc"
_VAULT_CREDS="$HOME/.vault/mc_forge_creds"
_forge_pat() { cat "$_VAULT_PAT" 2>/dev/null; }
_forge_curl() {
# _forge_curl METHOD PATH [JSON-body]
local method="$1" path="$2" data="${3:-}" pat
pat="$(_forge_pat)"
if [ -n "$data" ]; then
curl -s -X "$method" -H "Authorization: Bearer $pat" -H "Content-Type: application/json" -d "$data" "https://api.digitalocean.com/v2${path}"
else
curl -s -X "$method" -H "Authorization: Bearer $pat" "https://api.digitalocean.com/v2${path}"
fi
}
_forge_droplet_id() {
_forge_curl GET "/droplets?tag_name=${_FORGE_TAG}" \
| python3 -c "import sys,json;d=json.load(sys.stdin).get('droplets',[]);print(d[0]['id'] if d else '')"
}
_forge_project_id() {
_forge_curl GET "/projects" \
| python3 -c "import sys,json;print(next((p['id'] for p in json.load(sys.stdin)['projects'] if p['name']=='mc:dev'),''))"
}
_forge_wait_action() {
local aid="$1" st
for _ in $(seq 1 120); do
st=$(_forge_curl GET "/actions/$aid" | python3 -c "import sys,json;print(json.load(sys.stdin)['action']['status'])" 2>/dev/null)
[ "$st" = completed ] && return 0
[ "$st" = errored ] && { echo "action $aid errored" >&2; return 1; }
sleep 5
done
echo "action $aid timed out" >&2
return 1
}
cmd_forge() {
cat <<'EOF'
Forgejo origin lifecycle (DigitalOcean). Needs ~/.vault/do_pat_mc.
./run forge:down stop + snapshot + destroy (~$6/mo -> ~$0.30/mo idle)
./run forge:up restore from newest snapshot, refresh vault creds
EOF
}
cmd_forge_down() {
local id ip aid snap
id="$(_forge_droplet_id)"
[ -n "$id" ] || { echo "no live mc-forge droplet (already down?)" >&2; return 1; }
ip=$(grep -E '^FORGE_IP=' "$_VAULT_CREDS" 2>/dev/null | cut -d= -f2)
echo "[1/4] stopping forgejo service on ${ip:-?}"
[ -n "$ip" ] && ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -i ~/.ssh/id_mc_fleet root@"$ip" 'systemctl stop forgejo' 2>/dev/null || true
echo "[2/4] powering off droplet $id"
aid=$(_forge_curl POST "/droplets/$id/actions" '{"type":"power_off"}' | python3 -c "import sys,json;print(json.load(sys.stdin)['action']['id'])")
_forge_wait_action "$aid" || return 1
snap="${_FORGE_SNAP_PREFIX}-$(date +%Y%m%d%H%M%S)"
echo "[3/4] snapshotting -> $snap"
aid=$(_forge_curl POST "/droplets/$id/actions" "{\"type\":\"snapshot\",\"name\":\"$snap\"}" | python3 -c "import sys,json;print(json.load(sys.stdin)['action']['id'])")
_forge_wait_action "$aid" || return 1
echo "[4/4] destroying droplet $id"
_forge_curl DELETE "/droplets/$id" >/dev/null
echo "forge down — snapshot $snap kept (~\$0.30/mo). './run forge:up' to restore."
}
cmd_forge_up() {
local snapid did ip code projid admin_user admin_pass
snapid=$(_forge_curl GET "/snapshots?resource_type=droplet" \
| python3 -c "import sys,json;s=[x for x in json.load(sys.stdin)['snapshots'] if x['name'].startswith('${_FORGE_SNAP_PREFIX}')];s.sort(key=lambda x:x['created_at']);print(s[-1]['id'] if s else '')")
[ -n "$snapid" ] || { echo "no ${_FORGE_SNAP_PREFIX}-* snapshot found" >&2; return 1; }
echo "[1/4] creating droplet from snapshot $snapid"
did=$(_forge_curl POST "/droplets" "{\"name\":\"mc-forge\",\"region\":\"${_FORGE_REGION}\",\"size\":\"${_FORGE_SIZE}\",\"image\":${snapid},\"ssh_keys\":[${_FORGE_KEY_ID}],\"tags\":[\"magic-civilization\",\"${_FORGE_TAG}\"]}" \
| python3 -c "import sys,json;d=json.load(sys.stdin).get('droplet');print(d['id'] if d else '')")
[ -n "$did" ] || { echo "create failed" >&2; return 1; }
echo "[2/4] waiting for active + ip (droplet $did)"
for _ in $(seq 1 40); do
ip=$(_forge_curl GET "/droplets/$did" | python3 -c "import sys,json;d=json.load(sys.stdin)['droplet'];ips=[n['ip_address'] for n in d['networks']['v4'] if n['type']=='public'];print(ips[0] if ips and d['status']=='active' else '')")
[ -n "$ip" ] && break
sleep 8
done
[ -n "$ip" ] || { echo "droplet never reported an ip" >&2; return 1; }
echo "[3/4] waiting for forgejo http at $ip:3000"
for _ in $(seq 1 30); do code=$(curl -s -o /dev/null -m 5 -w "%{http_code}" "http://$ip:3000/" 2>/dev/null); [ "$code" = 200 ] && break; sleep 4; done
projid="$(_forge_project_id)"
[ -n "$projid" ] && _forge_curl POST "/projects/$projid/resources" "{\"resources\":[\"do:droplet:$did\"]}" >/dev/null
echo "[4/4] refreshing $_VAULT_CREDS with new ip"
admin_user=$(grep -E '^ADMIN_USER=' "$_VAULT_CREDS" 2>/dev/null | cut -d= -f2); admin_user=${admin_user:-mcadmin}
admin_pass=$(grep -E '^ADMIN_PASS=' "$_VAULT_CREDS" 2>/dev/null | cut -d= -f2)
umask 177
cat > "$_VAULT_CREDS" <<EOF
FORGE_IP=$ip
FORGE_URL=http://$ip:3000
ADMIN_USER=$admin_user
ADMIN_PASS=$admin_pass
SSH_KEY=~/.ssh/id_mc_fleet
EOF
echo "forge up at http://$ip:3000 (http $code). vault creds refreshed."
}

View file

@ -99,9 +99,11 @@ mod resurrection_tests {
// from the crate root so GdCombatResolver can call them.
assert!(bypasses_zoc(&[Keyword::Flying]));
assert!(!bypasses_zoc(&[Keyword::Charge]));
assert_eq!(xp_threshold(0), Some(10));
assert_eq!(heal_on_promote(80), 40);
assert_eq!(max_promotion_level(), 8);
// Canonical values loaded from promotions.json: thresholds [15,30,45,60],
// heal 30% of max HP.
assert_eq!(xp_threshold(0), Some(15));
assert_eq!(heal_on_promote(80), 24);
assert_eq!(max_promotion_level(), 4);
}
#[test]

View file

@ -1,11 +1,48 @@
use serde::{Deserialize, Serialize};
use std::sync::OnceLock;
/// XP thresholds for promotion levels. Index 0 = level 0→1, etc.
/// Matches Civ5 curve: 10, 30, 60, 100, 150, 210, 280, 360.
const XP_THRESHOLDS: &[i32] = &[10, 30, 60, 100, 150, 210, 280, 360];
/// Promotion tuning loaded from the canonical content store
/// (`public/resources/promotions/promotions.json`). Rail-2: neither Rust nor
/// GDScript hardcodes game content — the XP thresholds and heal-on-promote
/// percentage live in JSON and are read here at runtime.
#[derive(Debug, Clone, Deserialize)]
struct PromotionConfig {
/// XP thresholds for promotion levels. Index 0 = level 0→1, etc.
/// Length is the maximum promotion level (the depth of the promotion trees).
xp_thresholds: Vec<i32>,
/// HP healed on promotion, as a percentage of max HP.
heal_on_promote_percent: f32,
}
/// HP healed on promotion (percentage of max HP).
const HEAL_ON_PROMOTE_FRACTION: f32 = 0.50;
/// Load + cache the promotion tuning. The JSON is compiled in via `include_str!`
/// (WASM- and GDExtension-safe — no filesystem access at runtime) and parsed
/// once into a process-wide `OnceLock`, mirroring `mc_comms::config`.
fn promotion_config() -> &'static PromotionConfig {
static CELL: OnceLock<PromotionConfig> = OnceLock::new();
CELL.get_or_init(|| {
const JSON: &str =
include_str!("../../../../../public/resources/promotions/promotions.json");
let value: serde_json::Value =
serde_json::from_str(JSON).expect("promotions.json must parse as valid JSON");
let xp_thresholds = value
.get("xp_thresholds")
.and_then(|v| serde_json::from_value::<Vec<i32>>(v.clone()).ok())
.expect("promotions.json must define an xp_thresholds array");
assert!(
!xp_thresholds.is_empty(),
"promotions.json xp_thresholds must be non-empty"
);
let heal_on_promote_percent = value
.get("heal_on_promote_percent")
.and_then(serde_json::Value::as_f64)
.map(|p| p as f32)
.expect("promotions.json must define heal_on_promote_percent");
PromotionConfig {
xp_thresholds,
heal_on_promote_percent,
}
})
}
/// XP gained from a combat engagement. Scales with relative strength difference.
/// `strength_ratio` = defender_effective / attacker_effective.
@ -25,11 +62,12 @@ pub fn xp_from_combat(base_xp: i32, strength_ratio: f32) -> i32 {
/// Check if a unit qualifies for promotion at its current XP and level.
/// Returns the next promotion level if qualified, None otherwise.
pub fn check_promotion(current_xp: i32, current_level: i32) -> Option<i32> {
let thresholds = &promotion_config().xp_thresholds;
let idx = current_level as usize;
if idx >= XP_THRESHOLDS.len() {
if idx >= thresholds.len() {
return None; // Max level reached
}
if current_xp >= XP_THRESHOLDS[idx] {
if current_xp >= thresholds[idx] {
Some(current_level + 1)
} else {
None
@ -38,18 +76,19 @@ pub fn check_promotion(current_xp: i32, current_level: i32) -> Option<i32> {
/// HP healed when a unit promotes.
pub fn heal_on_promote(max_hp: i32) -> i32 {
(max_hp as f32 * HEAL_ON_PROMOTE_FRACTION).round() as i32
let percent = promotion_config().heal_on_promote_percent;
(max_hp as f32 * percent / 100.0).round() as i32
}
/// Maximum promotion level.
pub fn max_promotion_level() -> i32 {
XP_THRESHOLDS.len() as i32
promotion_config().xp_thresholds.len() as i32
}
/// XP threshold for a given promotion level.
/// Returns None if the level is beyond max.
pub fn xp_threshold(level: i32) -> Option<i32> {
XP_THRESHOLDS.get(level as usize).copied()
promotion_config().xp_thresholds.get(level as usize).copied()
}
/// Validate that a promotion choice is compatible with existing promotions.
@ -128,21 +167,25 @@ mod tests {
#[test]
fn promotion_at_threshold() {
assert_eq!(check_promotion(10, 0), Some(1));
assert_eq!(check_promotion(9, 0), None);
// Canonical thresholds from promotions.json: [15, 30, 45, 60].
assert_eq!(check_promotion(15, 0), Some(1));
assert_eq!(check_promotion(14, 0), None);
assert_eq!(check_promotion(30, 1), Some(2));
}
#[test]
fn max_level_no_more_promotions() {
// Four promotion-tree levels → four thresholds.
let max = max_promotion_level();
assert_eq!(max, 4);
assert_eq!(check_promotion(9999, max), None);
}
#[test]
fn heal_on_promote_half_hp() {
assert_eq!(heal_on_promote(60), 30);
assert_eq!(heal_on_promote(100), 50);
fn heal_on_promote_uses_json_percent() {
// promotions.json heal_on_promote_percent = 30.
assert_eq!(heal_on_promote(60), 18);
assert_eq!(heal_on_promote(100), 30);
}
#[test]