#!/usr/bin/env bash # rl-train.sh — manage the RL self-play training run on apricot. # # Subcommands: # status Show PID, elapsed, fps, last train block, eval dir state # logs [N] Tail N (default 60) lines of the active training log # gpu Show nvidia-smi snapshot # procs Count godot/python procs related to training # launch Start training (env-driven; see ENV below) # kill Stop training cleanly (SIGTERM, then SIGKILL after 5s) # sync scp local rl_self_play/ to the apricot worktree # # Env vars (with defaults): # RL_HOST=apricot.lan SSH alias for the training box (LAN; VPN ProxyJump can fail) # RL_WORKTREE=/var/home/lilith/.cache/mc-rl-train-1779015795 # RL_RUN_NAME=duel-v1b-cuda1 # RL_DEVICE=cuda:1 # RL_ENVS=4 # RL_TOTAL_STEPS=200000 # RL_EVAL_FREQ=20000 # RL_EVAL_EPS=10 # RL_MAX_TURNS=100 # RL_PIDFILE=/tmp/rl-train.pid (on the remote) set -euo pipefail : "${RL_HOST:=apricot.lan}" : "${RL_WORKTREE:=/var/home/lilith/.cache/mc-rl-train-1779015795}" : "${RL_RUN_NAME:=duel-v1b-cuda1}" : "${RL_DEVICE:=cuda:1}" : "${RL_ENVS:=4}" : "${RL_TOTAL_STEPS:=200000}" : "${RL_EVAL_FREQ:=20000}" : "${RL_EVAL_EPS:=10}" : "${RL_MAX_TURNS:=100}" : "${RL_PIDFILE:=/tmp/rl-train.pid}" LOG_REMOTE="${RL_WORKTREE}/training-${RL_RUN_NAME}.log" EVAL_DIR_REMOTE="${RL_WORKTREE}/tooling/rl_self_play/runs/${RL_RUN_NAME}/eval" cmd="${1:-status}" shift || true remote() { ssh "${RL_HOST}" "$1"; } case "$cmd" in status) remote " set +e echo '---PYTHON PID---' py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}') if [ -z \"\$py\" ]; then echo 'no training process' echo '---EVAL DIR (${EVAL_DIR_REMOTE})---' ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing' exit 0 fi ps -p \$py -o pid,etime,pcpu,pmem,cmd log=\$(readlink /proc/\$py/fd/1 2>/dev/null) echo \"---LOG (\$log)---\" tail -40 \"\$log\" 2>/dev/null || echo 'log not readable' echo '---EVAL DIR (${EVAL_DIR_REMOTE})---' ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing' " ;; logs) n="${1:-60}" remote " py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}') if [ -z \"\$py\" ]; then echo 'no training process'; exit 1; fi tail -${n} \"\$(readlink /proc/\$py/fd/1)\" " ;; gpu) remote 'nvidia-smi --query-gpu=index,name,memory.used,memory.total,utilization.gpu --format=csv' ;; procs) remote " printf 'python train procs: '; pgrep -af 'python3 -m tooling.rl_self_play.train' | wc -l printf 'godot-bin procs: '; pgrep -af 'godot-bin' | wc -l uptime " ;; launch) # Launch as a transient systemd --user .service under heavy-tests.slice. # The slice (CPUWeight=20, MemoryMax=32G, TasksMax=4096) prevents the godot # workers spawned by the python parent from starving sshd/interactive work. # Every child process (flatpak, bwrap, godot-bin) inherits the cgroup, so a # 3000-proc explosion stays contained — exactly the wedge mode seen on # 2026-05-18 and 2026-05-19. # # Unit name includes epoch so re-launches with the same RL_RUN_NAME don't # collide with a stopped-but-not-yet-collected unit. RL_UNIT="rl-train-${RL_RUN_NAME}-$(date +%s)" remote " set -e cd ${RL_WORKTREE} existing=\$(systemctl --user list-units --type=service --no-legend --state=running 'rl-train-*' 2>/dev/null | awk '{print \$1}' | head -1) if [ -n \"\$existing\" ]; then echo \"training already running: \$existing — run 'kill' first\" exit 1 fi systemd-run --user \\ --slice=heavy-tests.slice \\ --unit=${RL_UNIT} \\ --collect --quiet \\ --working-directory=${RL_WORKTREE} \\ --setenv=PYTHONUNBUFFERED=1 \\ --property=StandardOutput=append:${LOG_REMOTE} \\ --property=StandardError=append:${LOG_REMOTE} \\ -- python3 -m tooling.rl_self_play.train \\ --device ${RL_DEVICE} --num-envs ${RL_ENVS} \\ --total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \\ --eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \\ --run-name ${RL_RUN_NAME} echo ${RL_UNIT} > ${RL_PIDFILE} sleep 3 systemctl --user status ${RL_UNIT} --no-pager --lines=0 systemctl --user show ${RL_UNIT} --property=MainPID " ;; kill) # Stop all rl-train-* transient services. systemd cascades SIGTERM through # the cgroup, then SIGKILL after TimeoutStopSec, reaping all godot children. # Falls back to pkill for any procs not in a unit (legacy runs / orphans). remote " units=\$(systemctl --user list-units --type=service --no-legend 'rl-train-*' 2>/dev/null | awk '{print \$1}') if [ -n \"\$units\" ]; then echo \"stopping units:\" echo \"\$units\" echo \"\$units\" | xargs -r systemctl --user stop fi # Legacy / out-of-unit sweep pkill -f 'rl_self_play.train' 2>/dev/null || true sleep 5 pkill -9 -f 'rl_self_play.train' 2>/dev/null || true pkill -9 -f 'godot-bin --path' 2>/dev/null || true printf 'remaining: ' pgrep -f 'rl_self_play.train|godot-bin' | wc -l " ;; sync) here="$(cd "$(dirname "$0")/.." && pwd)" scp -q "${here}/tooling/rl_self_play/"*.py \ "${RL_HOST}:${RL_WORKTREE}/tooling/rl_self_play/" echo "synced tooling/rl_self_play/*.py to ${RL_HOST}:${RL_WORKTREE}" ;; *) sed -n '2,28p' "$0" exit 2 ;; esac