diff --git a/infra/packer/golden-image.pkr.hcl b/infra/packer/golden-image.pkr.hcl index de7a57ca..7e35907f 100644 --- a/infra/packer/golden-image.pkr.hcl +++ b/infra/packer/golden-image.pkr.hcl @@ -53,6 +53,14 @@ variable "remote_user" { default = "mc" } +variable "base_image" { + type = string + default = "ubuntu-24-04-x64" + # Stock Ubuntu for a COLD build, or a previous mc-golden snapshot ID for an + # INCREMENTAL rebuild — provision.sh is idempotent, so it only redoes changed + # work (~3-8 min vs ~20). `./run dist:image` picks this automatically. +} + variable "fleet_pubkey" { type = string default = "" @@ -68,7 +76,7 @@ source "digitalocean" "golden" { api_token = var.do_token region = var.region size = var.build_size - image = "ubuntu-24-04-x64" + image = var.base_image ssh_username = "root" snapshot_name = "mc-golden-${local.ts}" } diff --git a/infra/packer/provision.sh b/infra/packer/provision.sh index a4295749..59e21c5e 100755 --- a/infra/packer/provision.sh +++ b/infra/packer/provision.sh @@ -65,10 +65,11 @@ as_user() { bash -lc "$1" } -echo "=== [3/7] clone repo @ \$HOME/$REPO_PATH (ref $GIT_REF) ===" +echo "=== [3/7] clone-or-update repo @ \$HOME/$REPO_PATH (ref $GIT_REF) ===" +# Idempotent: clone on a cold build, fetch+reset on an incremental rebuild (the +# repo is already present when building FROM a previous golden snapshot). as_user "mkdir -p ~/$(dirname "$REPO_PATH")" -as_user "git clone '$GIT_REMOTE' ~/$REPO_PATH" -as_user "cd ~/$REPO_PATH && git checkout -f '$GIT_REF'" +as_user "if [ -d ~/$REPO_PATH/.git ]; then cd ~/$REPO_PATH && git remote set-url origin '$GIT_REMOTE' && git fetch --depth=1 origin '$GIT_REF' && git reset --hard FETCH_HEAD; else git clone '$GIT_REMOTE' ~/$REPO_PATH && cd ~/$REPO_PATH && git checkout -f '$GIT_REF'; fi" echo "=== [4/7] toolchain via scripts/dev-setup/linux.sh ===" # WITH_RUNNER must be defined: linux.sh references it unguarded under set -u and diff --git a/scripts/run/dist.sh b/scripts/run/dist.sh index 967e5ca3..78b16b17 100755 --- a/scripts/run/dist.sh +++ b/scripts/run/dist.sh @@ -51,6 +51,7 @@ cmd_dist() { cat <<'EOF' Distributed test/train fleet (DigitalOcean). Set TF_VAR_do_token first. ./run dist:check offline: fmt + validate + mocked test (no token/spend) + ./run dist:image [--cold] (re)build golden image — incremental by default (~3-8min vs ~20 cold) ./run dist:up [size] [region] e.g. ./run dist:up 10 ./run dist:sim [turn_limit] [--destroy-after] ./run dist:train [--destroy-after] @@ -79,6 +80,34 @@ cmd_dist_check() { echo "dist:check OK — config is valid, no resources touched." } +cmd_dist_image() { + # (Re)build the golden image. INCREMENTAL by default: builds FROM the newest + # mc-golden snapshot, so provision.sh (idempotent) only redoes changed work + # (~3-8 min). --cold builds from stock Ubuntu (~20 min) — resets accumulated + # layer cruft; run occasionally. Needs ~/.vault/{do_pat_mc,mc_forge_creds}. + local cold=false a + for a in "$@"; do [ "$a" = "--cold" ] && cold=true; done + local root pat + root="$(_dist_repo_root)" + pat="$(cat ~/.vault/do_pat_mc 2>/dev/null)" + [ -n "$pat" ] || { echo "no ~/.vault/do_pat_mc" >&2; return 1; } + export DIGITALOCEAN_TOKEN="$pat" + # shellcheck disable=SC1090 + . ~/.vault/mc_forge_creds + export PKR_VAR_git_remote="http://${ADMIN_USER}:${ADMIN_PASS}@${FORGE_IP}:3000/mcadmin/magicciv.git" + PKR_VAR_fleet_pubkey="$(cat ~/.ssh/id_mc_fleet.pub)"; export PKR_VAR_fleet_pubkey + local base="ubuntu-24-04-x64" prev + if ! $cold; then + prev="$(curl -s -H "Authorization: Bearer $pat" "https://api.digitalocean.com/v2/snapshots?resource_type=droplet&per_page=200" \ + | python3 -c "import sys,json;s=[x for x in json.load(sys.stdin)['snapshots'] if x['name'].startswith('mc-golden')];s.sort(key=lambda x:x['created_at']);print(s[-1]['id'] if s else '')" 2>/dev/null)" + if [ -n "$prev" ]; then base="$prev"; echo "INCREMENTAL rebuild from snapshot $base (pass --cold for a full rebuild)"; else echo "no prior golden — cold build"; fi + else + echo "COLD rebuild from $base" + fi + export PKR_VAR_base_image="$base" + ( cd "$root/infra/packer" && packer init golden-image.pkr.hcl >/dev/null && packer build golden-image.pkr.hcl ) +} + cmd_dist_up() { local n="${1:-}" [[ "$n" =~ ^[0-9]+$ ]] || { echo "usage: ./run dist:up [size] [region]" >&2; return 1; }