scripts(ai-docker): 🔨 Update Docker configuration for AI simulator with GPU support, environment variables, and runtime optimizations
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
97a5b9391f
commit
a92e25e064
1 changed files with 139 additions and 0 deletions
139
scripts/mc-ai-docker.sh
Executable file
139
scripts/mc-ai-docker.sh
Executable file
|
|
@ -0,0 +1,139 @@
|
|||
#!/usr/bin/env bash
|
||||
# mc-ai-docker.sh — run an mc-ai autoplay batch inside a freeze-proof Docker
|
||||
# container. Mirrors scripts/godot-docker.sh, applied to the simulator side.
|
||||
#
|
||||
# Why: bare `systemd-run --user` could not bound the host blast radius — a
|
||||
# runaway autoplay batch wedged apricot while concurrent mc-claude sessions
|
||||
# were running. Docker's cgroup caps (--cpus / --memory / --memory-swap /
|
||||
# --pids-limit) provide a hard ceiling the host cannot exceed regardless of
|
||||
# how many seeds the batch spawns.
|
||||
#
|
||||
# Usage:
|
||||
# scripts/mc-ai-docker.sh godot --version
|
||||
# scripts/mc-ai-docker.sh bash tools/autoplay-batch.sh 1 50 /work/.local/out
|
||||
#
|
||||
# Environment:
|
||||
# MC_AI_IMAGE_TAG image tag to run (default: mc-ai:latest). apricot-run.sh
|
||||
# launcher.sh sets this to mc-ai:<short-sha> so the image
|
||||
# matches the worktree SHA being driven.
|
||||
# MC_AI_CPUS cgroup cpu cap (default: 4)
|
||||
# MC_AI_MEMORY cgroup memory cap (default: 6g) — swap is pinned equal
|
||||
# so the container cannot escape via swap.
|
||||
# MC_AI_PIDS cgroup pid cap (default: 256)
|
||||
# MC_AI_WORKTREE host worktree to bind-mount at /work (default: repo root
|
||||
# containing this script). apricot launcher sets it to
|
||||
# the per-run scratch worktree path.
|
||||
# MC_AI_OUTPUT_DIR optional host output dir; if set, bind-mounted at
|
||||
# /work/.local/out and made writable to uid 1000.
|
||||
# MC_AI_CONTAINER_NAME optional `docker run --name` value (lets the caller
|
||||
# trap and docker-kill on exit).
|
||||
# MC_AI_EXTRA_ENV space-separated VAR=value pairs forwarded to the
|
||||
# container via `-e`. Use this to pass through
|
||||
# AUTOPLAY_GODOT_BIN, AI_USE_MCTS, PARALLEL, etc.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
DOCKERFILE="$REPO_ROOT/tools/docker/Dockerfile.mc-ai"
|
||||
|
||||
IMAGE="${MC_AI_IMAGE_TAG:-mc-ai:latest}"
|
||||
WORKTREE="${MC_AI_WORKTREE:-$REPO_ROOT}"
|
||||
|
||||
CPUS="${MC_AI_CPUS:-4}"
|
||||
MEMORY="${MC_AI_MEMORY:-6g}"
|
||||
PIDS_LIMIT="${MC_AI_PIDS:-256}"
|
||||
|
||||
ensure_docker_running() {
|
||||
if ! docker info >/dev/null 2>&1; then
|
||||
echo "ERROR: Docker daemon not reachable. Start dockerd before invoking mc-ai-docker.sh." >&2
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_image() {
|
||||
if docker image inspect "$IMAGE" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
# `latest` is the only tag we auto-build — SHA-pinned tags must be built
|
||||
# explicitly by the caller (apricot-run.sh launcher does this). Failing
|
||||
# loudly here avoids silently building stale `latest` when the caller
|
||||
# forgot to pre-build the SHA tag.
|
||||
if [[ "$IMAGE" != "mc-ai:latest" ]]; then
|
||||
echo "ERROR: image $IMAGE not found and is not the auto-buildable mc-ai:latest tag." >&2
|
||||
echo " Build it first with:" >&2
|
||||
echo " docker build --tag $IMAGE --file $DOCKERFILE $WORKTREE" >&2
|
||||
exit 2
|
||||
fi
|
||||
echo "[mc-ai-docker] image $IMAGE not found — building from $WORKTREE..." >&2
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
--tag "$IMAGE" \
|
||||
--file "$DOCKERFILE" \
|
||||
"$WORKTREE"
|
||||
}
|
||||
|
||||
ensure_output_dir() {
|
||||
if [[ -z "${MC_AI_OUTPUT_DIR:-}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
mkdir -p "$MC_AI_OUTPUT_DIR"
|
||||
}
|
||||
|
||||
main() {
|
||||
if [[ $# -eq 0 ]]; then
|
||||
echo "usage: mc-ai-docker.sh <command> [args...]" >&2
|
||||
exit 64
|
||||
fi
|
||||
|
||||
ensure_docker_running
|
||||
ensure_image
|
||||
ensure_output_dir
|
||||
|
||||
local tty_flag=""
|
||||
if [[ -t 1 ]]; then
|
||||
tty_flag="-t"
|
||||
fi
|
||||
|
||||
local extra_env_args=()
|
||||
if [[ -n "${MC_AI_EXTRA_ENV:-}" ]]; then
|
||||
# Word-split intentionally — each token is VAR=value.
|
||||
# shellcheck disable=SC2206
|
||||
local _envs=( ${MC_AI_EXTRA_ENV} )
|
||||
for kv in "${_envs[@]}"; do
|
||||
extra_env_args+=( -e "$kv" )
|
||||
done
|
||||
fi
|
||||
|
||||
local output_mount_args=()
|
||||
if [[ -n "${MC_AI_OUTPUT_DIR:-}" ]]; then
|
||||
output_mount_args=( --volume "$MC_AI_OUTPUT_DIR:/work/.local/out" )
|
||||
fi
|
||||
|
||||
local name_args=()
|
||||
if [[ -n "${MC_AI_CONTAINER_NAME:-}" ]]; then
|
||||
name_args=( --name "$MC_AI_CONTAINER_NAME" )
|
||||
fi
|
||||
|
||||
# --init reaps zombies (Godot is signal-noisy under headless timeouts).
|
||||
# --cap-drop=ALL + --security-opt=no-new-privileges is the no-privileged
|
||||
# posture the brief asked for.
|
||||
docker run \
|
||||
--rm \
|
||||
-i $tty_flag \
|
||||
--init \
|
||||
--cap-drop=ALL \
|
||||
--security-opt no-new-privileges \
|
||||
--cpus "$CPUS" \
|
||||
--memory "$MEMORY" \
|
||||
--memory-swap "$MEMORY" \
|
||||
--pids-limit "$PIDS_LIMIT" \
|
||||
"${name_args[@]}" \
|
||||
--volume "$WORKTREE:/work" \
|
||||
"${output_mount_args[@]}" \
|
||||
"${extra_env_args[@]}" \
|
||||
--workdir /work \
|
||||
"$IMAGE" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Loading…
Add table
Reference in a new issue