Some checks are pending
ci / regression gate (push) Waiting to run
Packer destroys its build droplet on a clean finish, but a killed/slept/ network-dropped run leaves the s-8vcpu-16gb-amd builder alive (~$192/mo). This happened once already (.project/handoffs/20260629_packer-cross-account-leak.md). Two defense layers: - scripts/cull-orphan-builders.sh reaps leftover builders by name prefix (mc-packer-* / legacy packer-*) with a size guard and an optional age guard; pins the MC token via --access-token. - cloud-bringup.sh calls it in its EXIT trap, so a failed/Ctrl-C'd build reaps its own builder. - infra/launchd/com.uvlava.mc.cull-builders.plist sweeps every 30m with --min-age-min 90 to catch SIGKILL/power-loss cases no trap can. golden-image.pkr.hcl names the builder mc-packer-<ts> for deterministic matching. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
102 lines
4.1 KiB
Bash
Executable file
102 lines
4.1 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Cull orphaned Packer build droplets ("zombies") from the MC DigitalOcean account.
|
|
#
|
|
# Packer destroys its build droplet on a clean finish. An interrupted or failed run
|
|
# (SIGKILL, laptop sleep, network drop) can leave the s-8vcpu-16gb-amd builder alive —
|
|
# ~$192/mo bleeding silently. See .project/handoffs/20260629_packer-cross-account-leak.md.
|
|
#
|
|
# Two ways this runs:
|
|
# * Automatically — cloud-bringup.sh calls it in its EXIT trap after every build,
|
|
# so a failed/Ctrl-C'd run reaps its own builder.
|
|
# * Periodically — from a launchd/cron timer, to catch hard-kill cases the trap
|
|
# can't (SIGKILL/power loss). Use --min-age-min so it never races a live build.
|
|
#
|
|
# Selector = droplet NAME prefix (never matches a real service droplet). The packer
|
|
# source names its builder "mc-packer-<ts>"; we also match the legacy default
|
|
# "packer-<uuid>" so pre-existing zombies are reaped. Size is a defense-in-depth guard.
|
|
#
|
|
# Usage:
|
|
# scripts/cull-orphan-builders.sh # reap every leftover builder now
|
|
# scripts/cull-orphan-builders.sh --min-age-min 90 # only reap builders >90 min old (cron-safe)
|
|
# scripts/cull-orphan-builders.sh --dry-run # list what would be reaped, delete nothing
|
|
set -euo pipefail
|
|
|
|
MIN_AGE_MIN=0
|
|
DRY_RUN=0
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--min-age-min) MIN_AGE_MIN="${2:?--min-age-min needs a value}"; shift 2 ;;
|
|
--dry-run) DRY_RUN=1; shift ;;
|
|
-h|--help) grep '^#' "$0" | sed 's/^#\{1,\} \{0,1\}//'; exit 0 ;;
|
|
*) echo "cull-orphan-builders: unknown arg '$1'" >&2; exit 2 ;;
|
|
esac
|
|
done
|
|
|
|
TOKEN_FILE="${MC_DO_TOKEN_FILE:-$HOME/.vault/do_pat_mc}"
|
|
[[ -r "$TOKEN_FILE" ]] || { echo "!!! no DO token at $TOKEN_FILE" >&2; exit 1; }
|
|
DIGITALOCEAN_ACCESS_TOKEN="$(cat "$TOKEN_FILE")"; export DIGITALOCEAN_ACCESS_TOKEN
|
|
|
|
# Name prefixes that identify an MC packer builder. Anchored — never matches a
|
|
# real service droplet (com.uvlava.*, ct-forge-*, etc.).
|
|
BUILD_SIZE="${MC_BUILD_SIZE:-s-8vcpu-16gb-amd}"
|
|
|
|
# Emit one "id<TAB>name<TAB>size<TAB>age_min" row per qualifying builder. Age is
|
|
# computed in python (portable RFC3339 parse; macOS `date` can't do it cleanly).
|
|
# --access-token pins the MC token explicitly (the documented rule), not whatever
|
|
# doctl's default context happens to hold.
|
|
builder_filter='
|
|
import json, os, re, sys
|
|
from datetime import datetime, timezone
|
|
|
|
min_age = float(os.environ["MIN_AGE_MIN"])
|
|
build_size = os.environ["BUILD_SIZE"]
|
|
rx = re.compile(r"^(mc-packer-|packer-)")
|
|
now = datetime.now(timezone.utc)
|
|
|
|
for d in json.load(sys.stdin) or []:
|
|
name = d.get("name", "")
|
|
if not rx.match(name):
|
|
continue
|
|
created = d.get("created_at", "")
|
|
try:
|
|
ts = datetime.fromisoformat(created.replace("Z", "+00:00"))
|
|
age_min = (now - ts).total_seconds() / 60.0
|
|
except ValueError:
|
|
age_min = 0.0 # unparseable timestamp -> treat as old enough to reap
|
|
if age_min < min_age:
|
|
continue
|
|
size = d.get("size_slug", "?")
|
|
did = d.get("id", "?")
|
|
# Defense-in-depth: only reap the known builder size. A differently-sized
|
|
# "packer-*" droplet is unexpected; surface it instead of nuking it.
|
|
if size != build_size:
|
|
print(f"SKIP-SIZE\t{did}\t{name}\t{size}\t{age_min:.0f}", file=sys.stderr)
|
|
continue
|
|
print(f"{did}\t{name}\t{size}\t{age_min:.0f}")
|
|
'
|
|
droplets_json="$(doctl compute droplet list -o json --access-token "$DIGITALOCEAN_ACCESS_TOKEN")"
|
|
mapfile -t victims < <(
|
|
printf '%s' "$droplets_json" \
|
|
| MIN_AGE_MIN="$MIN_AGE_MIN" BUILD_SIZE="$BUILD_SIZE" python3 -c "$builder_filter"
|
|
)
|
|
|
|
if [[ ${#victims[@]} -eq 0 ]]; then
|
|
echo "cull-orphan-builders: no orphaned packer builders found (min-age ${MIN_AGE_MIN}m)."
|
|
exit 0
|
|
fi
|
|
|
|
ids=()
|
|
for row in "${victims[@]}"; do
|
|
IFS=$'\t' read -r id name size age <<<"$row"
|
|
echo " orphan: $id $name $size ~${age}m old"
|
|
ids+=("$id")
|
|
done
|
|
|
|
if [[ $DRY_RUN -eq 1 ]]; then
|
|
echo "cull-orphan-builders: --dry-run, deleting nothing (${#ids[@]} would be culled)."
|
|
exit 0
|
|
fi
|
|
|
|
echo "cull-orphan-builders: deleting ${#ids[@]} orphaned builder(s) ..."
|
|
doctl compute droplet delete "${ids[@]}" --force
|
|
echo "cull-orphan-builders: done."
|