From a6f909a1513c9383c0c477520908e01fff3fd6af Mon Sep 17 00:00:00 2001 From: Natalie Date: Sun, 17 May 2026 05:22:23 -0700 Subject: [PATCH] =?UTF-8?q?feat(@projects/@magic-civilization):=20?= =?UTF-8?q?=E2=9C=A8=20add=20rl-train=20management=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- scripts/rl-train.sh | 129 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100755 scripts/rl-train.sh diff --git a/scripts/rl-train.sh b/scripts/rl-train.sh new file mode 100755 index 00000000..85908988 --- /dev/null +++ b/scripts/rl-train.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# rl-train.sh — manage the RL self-play training run on apricot. +# +# Subcommands: +# status Show PID, elapsed, fps, last train block, eval dir state +# logs [N] Tail N (default 60) lines of the active training log +# gpu Show nvidia-smi snapshot +# procs Count godot/python procs related to training +# launch Start training (env-driven; see ENV below) +# kill Stop training cleanly (SIGTERM, then SIGKILL after 5s) +# sync scp local rl_self_play/ to the apricot worktree +# +# Env vars (with defaults): +# RL_HOST=apricot SSH alias for the training box +# RL_WORKTREE=/var/home/lilith/.cache/mc-rl-train-1779015795 +# RL_RUN_NAME=duel-v1b-cuda1 +# RL_DEVICE=cuda:1 +# RL_ENVS=4 +# RL_TOTAL_STEPS=200000 +# RL_EVAL_FREQ=20000 +# RL_EVAL_EPS=10 +# RL_MAX_TURNS=100 +# RL_PIDFILE=/tmp/rl-train.pid (on the remote) + +set -euo pipefail + +: "${RL_HOST:=apricot}" +: "${RL_WORKTREE:=/var/home/lilith/.cache/mc-rl-train-1779015795}" +: "${RL_RUN_NAME:=duel-v1b-cuda1}" +: "${RL_DEVICE:=cuda:1}" +: "${RL_ENVS:=4}" +: "${RL_TOTAL_STEPS:=200000}" +: "${RL_EVAL_FREQ:=20000}" +: "${RL_EVAL_EPS:=10}" +: "${RL_MAX_TURNS:=100}" +: "${RL_PIDFILE:=/tmp/rl-train.pid}" + +LOG_REMOTE="${RL_WORKTREE}/training-${RL_RUN_NAME}.log" +EVAL_DIR_REMOTE="${RL_WORKTREE}/tooling/rl_self_play/runs/${RL_RUN_NAME}/eval" + +cmd="${1:-status}" +shift || true + +remote() { ssh "${RL_HOST}" "$1"; } + +case "$cmd" in + status) + remote " + set +e + echo '---PYTHON PID---' + py=\$(pgrep -f 'python3 -m tooling.rl_self_play.train' | head -1) + if [ -z \"\$py\" ]; then + echo 'no training process' + echo '---EVAL DIR (${EVAL_DIR_REMOTE})---' + ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing' + exit 0 + fi + ps -p \$py -o pid,etime,pcpu,pmem,cmd + log=\$(readlink /proc/\$py/fd/1 2>/dev/null) + echo \"---LOG (\$log)---\" + tail -40 \"\$log\" 2>/dev/null || echo 'log not readable' + echo '---EVAL DIR (${EVAL_DIR_REMOTE})---' + ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing' + " + ;; + + logs) + n="${1:-60}" + remote " + py=\$(pgrep -f 'python3 -m tooling.rl_self_play.train' | head -1) + if [ -z \"\$py\" ]; then echo 'no training process'; exit 1; fi + tail -${n} \"\$(readlink /proc/\$py/fd/1)\" + " + ;; + + gpu) + remote 'nvidia-smi --query-gpu=index,name,memory.used,memory.total,utilization.gpu --format=csv' + ;; + + procs) + remote " + printf 'python train procs: '; pgrep -af 'python3 -m tooling.rl_self_play.train' | wc -l + printf 'godot-bin procs: '; pgrep -af 'godot-bin' | wc -l + uptime + " + ;; + + launch) + remote " + cd ${RL_WORKTREE} || exit 1 + if pgrep -f 'python3 -m tooling.rl_self_play.train' >/dev/null; then + echo 'training already running; run kill first' + pgrep -af 'python3 -m tooling.rl_self_play.train' + exit 1 + fi + nohup python3 -m tooling.rl_self_play.train \ + --device ${RL_DEVICE} --num-envs ${RL_ENVS} \ + --total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \ + --eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \ + --run-name ${RL_RUN_NAME} > ${LOG_REMOTE} 2>&1 & + echo \$! > ${RL_PIDFILE} + sleep 3 + pgrep -af 'python3 -m tooling.rl_self_play.train' || (echo 'launch failed; check log'; tail -20 ${LOG_REMOTE}) + " + ;; + + kill) + remote " + pkill -f 'rl_self_play.train' 2>/dev/null || true + sleep 5 + pkill -9 -f 'rl_self_play.train' 2>/dev/null || true + pkill -9 -f 'godot-bin --path' 2>/dev/null || true + printf 'remaining: ' + pgrep -f 'rl_self_play.train|godot-bin' | wc -l + " + ;; + + sync) + here="$(cd "$(dirname "$0")/.." && pwd)" + scp -q "${here}/tooling/rl_self_play/"*.py \ + "${RL_HOST}:${RL_WORKTREE}/tooling/rl_self_play/" + echo "synced tooling/rl_self_play/*.py to ${RL_HOST}:${RL_WORKTREE}" + ;; + + *) + sed -n '2,28p' "$0" + exit 2 + ;; +esac