perf(infra): incremental golden-image rebuilds (layer on the last snapshot)
Packer base image is now a var; ./run dist:image builds FROM the newest mc-golden snapshot by default, so the idempotent provision.sh only redoes changed work (~3-8 min vs ~20 cold). --cold rebuilds from stock Ubuntu to reset layer cruft. Made the clone step idempotent (clone-or-fetch) so it works on a pre-provisioned base. Directly addresses 'avoid unnecessary rebuilds'. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
68099051b8
commit
d9588f8c80
3 changed files with 42 additions and 4 deletions
|
|
@ -53,6 +53,14 @@ variable "remote_user" {
|
|||
default = "mc"
|
||||
}
|
||||
|
||||
variable "base_image" {
|
||||
type = string
|
||||
default = "ubuntu-24-04-x64"
|
||||
# Stock Ubuntu for a COLD build, or a previous mc-golden snapshot ID for an
|
||||
# INCREMENTAL rebuild — provision.sh is idempotent, so it only redoes changed
|
||||
# work (~3-8 min vs ~20). `./run dist:image` picks this automatically.
|
||||
}
|
||||
|
||||
variable "fleet_pubkey" {
|
||||
type = string
|
||||
default = ""
|
||||
|
|
@ -68,7 +76,7 @@ source "digitalocean" "golden" {
|
|||
api_token = var.do_token
|
||||
region = var.region
|
||||
size = var.build_size
|
||||
image = "ubuntu-24-04-x64"
|
||||
image = var.base_image
|
||||
ssh_username = "root"
|
||||
snapshot_name = "mc-golden-${local.ts}"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,10 +65,11 @@ as_user() {
|
|||
bash -lc "$1"
|
||||
}
|
||||
|
||||
echo "=== [3/7] clone repo @ \$HOME/$REPO_PATH (ref $GIT_REF) ==="
|
||||
echo "=== [3/7] clone-or-update repo @ \$HOME/$REPO_PATH (ref $GIT_REF) ==="
|
||||
# Idempotent: clone on a cold build, fetch+reset on an incremental rebuild (the
|
||||
# repo is already present when building FROM a previous golden snapshot).
|
||||
as_user "mkdir -p ~/$(dirname "$REPO_PATH")"
|
||||
as_user "git clone '$GIT_REMOTE' ~/$REPO_PATH"
|
||||
as_user "cd ~/$REPO_PATH && git checkout -f '$GIT_REF'"
|
||||
as_user "if [ -d ~/$REPO_PATH/.git ]; then cd ~/$REPO_PATH && git remote set-url origin '$GIT_REMOTE' && git fetch --depth=1 origin '$GIT_REF' && git reset --hard FETCH_HEAD; else git clone '$GIT_REMOTE' ~/$REPO_PATH && cd ~/$REPO_PATH && git checkout -f '$GIT_REF'; fi"
|
||||
|
||||
echo "=== [4/7] toolchain via scripts/dev-setup/linux.sh ==="
|
||||
# WITH_RUNNER must be defined: linux.sh references it unguarded under set -u and
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ cmd_dist() {
|
|||
cat <<'EOF'
|
||||
Distributed test/train fleet (DigitalOcean). Set TF_VAR_do_token first.
|
||||
./run dist:check offline: fmt + validate + mocked test (no token/spend)
|
||||
./run dist:image [--cold] (re)build golden image — incremental by default (~3-8min vs ~20 cold)
|
||||
./run dist:up <workers> [size] [region] e.g. ./run dist:up 10
|
||||
./run dist:sim <games> [turn_limit] [--destroy-after]
|
||||
./run dist:train <total_steps> [--destroy-after]
|
||||
|
|
@ -79,6 +80,34 @@ cmd_dist_check() {
|
|||
echo "dist:check OK — config is valid, no resources touched."
|
||||
}
|
||||
|
||||
cmd_dist_image() {
|
||||
# (Re)build the golden image. INCREMENTAL by default: builds FROM the newest
|
||||
# mc-golden snapshot, so provision.sh (idempotent) only redoes changed work
|
||||
# (~3-8 min). --cold builds from stock Ubuntu (~20 min) — resets accumulated
|
||||
# layer cruft; run occasionally. Needs ~/.vault/{do_pat_mc,mc_forge_creds}.
|
||||
local cold=false a
|
||||
for a in "$@"; do [ "$a" = "--cold" ] && cold=true; done
|
||||
local root pat
|
||||
root="$(_dist_repo_root)"
|
||||
pat="$(cat ~/.vault/do_pat_mc 2>/dev/null)"
|
||||
[ -n "$pat" ] || { echo "no ~/.vault/do_pat_mc" >&2; return 1; }
|
||||
export DIGITALOCEAN_TOKEN="$pat"
|
||||
# shellcheck disable=SC1090
|
||||
. ~/.vault/mc_forge_creds
|
||||
export PKR_VAR_git_remote="http://${ADMIN_USER}:${ADMIN_PASS}@${FORGE_IP}:3000/mcadmin/magicciv.git"
|
||||
PKR_VAR_fleet_pubkey="$(cat ~/.ssh/id_mc_fleet.pub)"; export PKR_VAR_fleet_pubkey
|
||||
local base="ubuntu-24-04-x64" prev
|
||||
if ! $cold; then
|
||||
prev="$(curl -s -H "Authorization: Bearer $pat" "https://api.digitalocean.com/v2/snapshots?resource_type=droplet&per_page=200" \
|
||||
| python3 -c "import sys,json;s=[x for x in json.load(sys.stdin)['snapshots'] if x['name'].startswith('mc-golden')];s.sort(key=lambda x:x['created_at']);print(s[-1]['id'] if s else '')" 2>/dev/null)"
|
||||
if [ -n "$prev" ]; then base="$prev"; echo "INCREMENTAL rebuild from snapshot $base (pass --cold for a full rebuild)"; else echo "no prior golden — cold build"; fi
|
||||
else
|
||||
echo "COLD rebuild from $base"
|
||||
fi
|
||||
export PKR_VAR_base_image="$base"
|
||||
( cd "$root/infra/packer" && packer init golden-image.pkr.hcl >/dev/null && packer build golden-image.pkr.hcl )
|
||||
}
|
||||
|
||||
cmd_dist_up() {
|
||||
local n="${1:-}"
|
||||
[[ "$n" =~ ^[0-9]+$ ]] || { echo "usage: ./run dist:up <workers> [size] [region]" >&2; return 1; }
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue