2026-05-17 05:22:23 -07:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# rl-train.sh — manage the RL self-play training run on apricot.
|
|
|
|
|
#
|
|
|
|
|
# Subcommands:
|
|
|
|
|
# status Show PID, elapsed, fps, last train block, eval dir state
|
|
|
|
|
# logs [N] Tail N (default 60) lines of the active training log
|
|
|
|
|
# gpu Show nvidia-smi snapshot
|
|
|
|
|
# procs Count godot/python procs related to training
|
|
|
|
|
# launch Start training (env-driven; see ENV below)
|
|
|
|
|
# kill Stop training cleanly (SIGTERM, then SIGKILL after 5s)
|
|
|
|
|
# sync scp local rl_self_play/ to the apricot worktree
|
|
|
|
|
#
|
|
|
|
|
# Env vars (with defaults):
|
2026-05-17 08:27:50 -07:00
|
|
|
# RL_HOST=apricot.lan SSH alias for the training box (LAN; VPN ProxyJump can fail)
|
2026-05-17 05:22:23 -07:00
|
|
|
# RL_WORKTREE=/var/home/lilith/.cache/mc-rl-train-1779015795
|
|
|
|
|
# RL_RUN_NAME=duel-v1b-cuda1
|
|
|
|
|
# RL_DEVICE=cuda:1
|
|
|
|
|
# RL_ENVS=4
|
|
|
|
|
# RL_TOTAL_STEPS=200000
|
|
|
|
|
# RL_EVAL_FREQ=20000
|
|
|
|
|
# RL_EVAL_EPS=10
|
|
|
|
|
# RL_MAX_TURNS=100
|
|
|
|
|
# RL_PIDFILE=/tmp/rl-train.pid (on the remote)
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
2026-05-17 08:27:50 -07:00
|
|
|
: "${RL_HOST:=apricot.lan}"
|
2026-05-17 05:22:23 -07:00
|
|
|
: "${RL_WORKTREE:=/var/home/lilith/.cache/mc-rl-train-1779015795}"
|
|
|
|
|
: "${RL_RUN_NAME:=duel-v1b-cuda1}"
|
|
|
|
|
: "${RL_DEVICE:=cuda:1}"
|
|
|
|
|
: "${RL_ENVS:=4}"
|
|
|
|
|
: "${RL_TOTAL_STEPS:=200000}"
|
|
|
|
|
: "${RL_EVAL_FREQ:=20000}"
|
|
|
|
|
: "${RL_EVAL_EPS:=10}"
|
|
|
|
|
: "${RL_MAX_TURNS:=100}"
|
|
|
|
|
: "${RL_PIDFILE:=/tmp/rl-train.pid}"
|
|
|
|
|
|
|
|
|
|
LOG_REMOTE="${RL_WORKTREE}/training-${RL_RUN_NAME}.log"
|
|
|
|
|
EVAL_DIR_REMOTE="${RL_WORKTREE}/tooling/rl_self_play/runs/${RL_RUN_NAME}/eval"
|
|
|
|
|
|
|
|
|
|
cmd="${1:-status}"
|
|
|
|
|
shift || true
|
|
|
|
|
|
|
|
|
|
remote() { ssh "${RL_HOST}" "$1"; }
|
|
|
|
|
|
|
|
|
|
case "$cmd" in
|
|
|
|
|
status)
|
|
|
|
|
remote "
|
|
|
|
|
set +e
|
|
|
|
|
echo '---PYTHON PID---'
|
2026-05-17 05:28:24 -07:00
|
|
|
py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}')
|
2026-05-17 05:22:23 -07:00
|
|
|
if [ -z \"\$py\" ]; then
|
|
|
|
|
echo 'no training process'
|
|
|
|
|
echo '---EVAL DIR (${EVAL_DIR_REMOTE})---'
|
|
|
|
|
ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing'
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
|
|
|
|
ps -p \$py -o pid,etime,pcpu,pmem,cmd
|
|
|
|
|
log=\$(readlink /proc/\$py/fd/1 2>/dev/null)
|
|
|
|
|
echo \"---LOG (\$log)---\"
|
|
|
|
|
tail -40 \"\$log\" 2>/dev/null || echo 'log not readable'
|
|
|
|
|
echo '---EVAL DIR (${EVAL_DIR_REMOTE})---'
|
|
|
|
|
ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing'
|
|
|
|
|
"
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
logs)
|
|
|
|
|
n="${1:-60}"
|
|
|
|
|
remote "
|
2026-05-17 05:28:24 -07:00
|
|
|
py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}')
|
2026-05-17 05:22:23 -07:00
|
|
|
if [ -z \"\$py\" ]; then echo 'no training process'; exit 1; fi
|
|
|
|
|
tail -${n} \"\$(readlink /proc/\$py/fd/1)\"
|
|
|
|
|
"
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
gpu)
|
|
|
|
|
remote 'nvidia-smi --query-gpu=index,name,memory.used,memory.total,utilization.gpu --format=csv'
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
procs)
|
|
|
|
|
remote "
|
|
|
|
|
printf 'python train procs: '; pgrep -af 'python3 -m tooling.rl_self_play.train' | wc -l
|
|
|
|
|
printf 'godot-bin procs: '; pgrep -af 'godot-bin' | wc -l
|
|
|
|
|
uptime
|
|
|
|
|
"
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
launch)
|
2026-05-19 11:28:16 -07:00
|
|
|
# Launch as a transient systemd --user .service under heavy-tests.slice.
|
|
|
|
|
# The slice (CPUWeight=20, MemoryMax=32G, TasksMax=4096) prevents the godot
|
|
|
|
|
# workers spawned by the python parent from starving sshd/interactive work.
|
|
|
|
|
# Every child process (flatpak, bwrap, godot-bin) inherits the cgroup, so a
|
|
|
|
|
# 3000-proc explosion stays contained — exactly the wedge mode seen on
|
|
|
|
|
# 2026-05-18 and 2026-05-19.
|
|
|
|
|
#
|
|
|
|
|
# Unit name includes epoch so re-launches with the same RL_RUN_NAME don't
|
|
|
|
|
# collide with a stopped-but-not-yet-collected unit.
|
|
|
|
|
RL_UNIT="rl-train-${RL_RUN_NAME}-$(date +%s)"
|
2026-05-17 05:22:23 -07:00
|
|
|
remote "
|
2026-05-19 11:28:16 -07:00
|
|
|
set -e
|
|
|
|
|
cd ${RL_WORKTREE}
|
|
|
|
|
existing=\$(systemctl --user list-units --type=service --no-legend --state=running 'rl-train-*' 2>/dev/null | awk '{print \$1}' | head -1)
|
2026-05-17 05:34:29 -07:00
|
|
|
if [ -n \"\$existing\" ]; then
|
2026-05-19 11:28:16 -07:00
|
|
|
echo \"training already running: \$existing — run 'kill' first\"
|
2026-05-17 05:22:23 -07:00
|
|
|
exit 1
|
|
|
|
|
fi
|
2026-05-19 11:28:16 -07:00
|
|
|
systemd-run --user \\
|
|
|
|
|
--slice=heavy-tests.slice \\
|
|
|
|
|
--unit=${RL_UNIT} \\
|
|
|
|
|
--collect --quiet \\
|
|
|
|
|
--working-directory=${RL_WORKTREE} \\
|
|
|
|
|
--setenv=PYTHONUNBUFFERED=1 \\
|
|
|
|
|
--property=StandardOutput=append:${LOG_REMOTE} \\
|
|
|
|
|
--property=StandardError=append:${LOG_REMOTE} \\
|
|
|
|
|
-- python3 -m tooling.rl_self_play.train \\
|
|
|
|
|
--device ${RL_DEVICE} --num-envs ${RL_ENVS} \\
|
|
|
|
|
--total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \\
|
|
|
|
|
--eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \\
|
|
|
|
|
--run-name ${RL_RUN_NAME}
|
|
|
|
|
echo ${RL_UNIT} > ${RL_PIDFILE}
|
2026-05-17 05:22:23 -07:00
|
|
|
sleep 3
|
2026-05-19 11:28:16 -07:00
|
|
|
systemctl --user status ${RL_UNIT} --no-pager --lines=0
|
|
|
|
|
systemctl --user show ${RL_UNIT} --property=MainPID
|
2026-05-17 05:22:23 -07:00
|
|
|
"
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
kill)
|
2026-05-19 11:28:16 -07:00
|
|
|
# Stop all rl-train-* transient services. systemd cascades SIGTERM through
|
|
|
|
|
# the cgroup, then SIGKILL after TimeoutStopSec, reaping all godot children.
|
|
|
|
|
# Falls back to pkill for any procs not in a unit (legacy runs / orphans).
|
2026-05-17 05:22:23 -07:00
|
|
|
remote "
|
2026-05-19 11:28:16 -07:00
|
|
|
units=\$(systemctl --user list-units --type=service --no-legend 'rl-train-*' 2>/dev/null | awk '{print \$1}')
|
|
|
|
|
if [ -n \"\$units\" ]; then
|
|
|
|
|
echo \"stopping units:\"
|
|
|
|
|
echo \"\$units\"
|
|
|
|
|
echo \"\$units\" | xargs -r systemctl --user stop
|
|
|
|
|
fi
|
|
|
|
|
# Legacy / out-of-unit sweep
|
2026-05-17 05:22:23 -07:00
|
|
|
pkill -f 'rl_self_play.train' 2>/dev/null || true
|
|
|
|
|
sleep 5
|
|
|
|
|
pkill -9 -f 'rl_self_play.train' 2>/dev/null || true
|
|
|
|
|
pkill -9 -f 'godot-bin --path' 2>/dev/null || true
|
|
|
|
|
printf 'remaining: '
|
|
|
|
|
pgrep -f 'rl_self_play.train|godot-bin' | wc -l
|
|
|
|
|
"
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
sync)
|
|
|
|
|
here="$(cd "$(dirname "$0")/.." && pwd)"
|
|
|
|
|
scp -q "${here}/tooling/rl_self_play/"*.py \
|
|
|
|
|
"${RL_HOST}:${RL_WORKTREE}/tooling/rl_self_play/"
|
|
|
|
|
echo "synced tooling/rl_self_play/*.py to ${RL_HOST}:${RL_WORKTREE}"
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
*)
|
|
|
|
|
sed -n '2,28p' "$0"
|
|
|
|
|
exit 2
|
|
|
|
|
;;
|
|
|
|
|
esac
|