Ephemeral CPU Droplet fleet that horizontally scales the iteration loop:
- infra/terraform/test-fleet: cattle Droplets from a golden image (auto-discovered
by name via digitalocean_images), grouped under the mc:dev DO project, with a
mocked-provider test suite (no token/spend).
- infra/packer: golden-image builder reusing scripts/dev-setup/linux.sh.
- scripts/run/dist.sh: ./run dist:{check,up,sim,train,down} — shard sim/test
batches across workers via autoplay-batch AUTOPLAY_HOST+SEED_OFFSET.
GPU intentionally absent (workload is CPU-bound per docs/ai-production.md).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
57 lines
1.8 KiB
HCL
57 lines
1.8 KiB
HCL
# Distributed test/train fleet — disposable cattle from the Packer golden image.
|
|
# No persistent volume: workers are stateless. The golden image carries the warm
|
|
# clone + toolchain + prebuilt .so; results leave via the dispatch layer (scp).
|
|
|
|
resource "digitalocean_ssh_key" "fleet" {
|
|
name = "${var.name}-key"
|
|
public_key = file(pathexpand(var.ssh_public_key_path))
|
|
}
|
|
|
|
# Resolve the newest golden image by name substring. Skipped entirely when
|
|
# var.base_image is set (bootstrap path), so `terraform plan` works before any
|
|
# golden image exists.
|
|
data "digitalocean_images" "golden" {
|
|
count = var.base_image == "" ? 1 : 0
|
|
|
|
filter {
|
|
key = "name"
|
|
values = [var.golden_name_match]
|
|
match_by = "substring"
|
|
}
|
|
sort {
|
|
key = "created"
|
|
direction = "desc"
|
|
}
|
|
}
|
|
|
|
locals {
|
|
image = var.base_image != "" ? var.base_image : tostring(data.digitalocean_images.golden[0].images[0].id)
|
|
}
|
|
|
|
resource "digitalocean_droplet" "worker" {
|
|
count = var.workers
|
|
name = "${var.name}-${count.index}"
|
|
size = var.size
|
|
region = var.region
|
|
image = local.image
|
|
ssh_keys = [digitalocean_ssh_key.fleet.id]
|
|
|
|
# Thin cloud-init: copy the injected key to the build user and fast-forward
|
|
# the warm clone to the requested ref. The golden image already holds the
|
|
# toolchain + prebuilt GDExtension, so there is nothing heavy to install here.
|
|
user_data = templatefile("${path.module}/cloud-init.yaml", {
|
|
git_remote = var.git_remote
|
|
git_ref = var.git_ref
|
|
remote_user = var.remote_user
|
|
})
|
|
}
|
|
|
|
# Group fleet workers under the DigitalOcean "mc" project.
|
|
data "digitalocean_project" "mc" {
|
|
name = var.do_project
|
|
}
|
|
|
|
resource "digitalocean_project_resources" "fleet" {
|
|
project = data.digitalocean_project.mc.id
|
|
resources = [for d in digitalocean_droplet.worker : d.urn]
|
|
}
|