#!/usr/bin/env bash # rl-train.sh — manage the RL self-play training run on apricot. # # Subcommands: # status Show PID, elapsed, fps, last train block, eval dir state # logs [N] Tail N (default 60) lines of the active training log # gpu Show nvidia-smi snapshot # procs Count godot/python procs related to training # launch Start training (env-driven; see ENV below) # kill Stop training cleanly (SIGTERM, then SIGKILL after 5s) # sync scp local rl_self_play/ to the apricot worktree # # Env vars (with defaults): # RL_HOST=apricot SSH alias for the training box # RL_WORKTREE=/var/home/lilith/.cache/mc-rl-train-1779015795 # RL_RUN_NAME=duel-v1b-cuda1 # RL_DEVICE=cuda:1 # RL_ENVS=4 # RL_TOTAL_STEPS=200000 # RL_EVAL_FREQ=20000 # RL_EVAL_EPS=10 # RL_MAX_TURNS=100 # RL_PIDFILE=/tmp/rl-train.pid (on the remote) set -euo pipefail : "${RL_HOST:=apricot}" : "${RL_WORKTREE:=/var/home/lilith/.cache/mc-rl-train-1779015795}" : "${RL_RUN_NAME:=duel-v1b-cuda1}" : "${RL_DEVICE:=cuda:1}" : "${RL_ENVS:=4}" : "${RL_TOTAL_STEPS:=200000}" : "${RL_EVAL_FREQ:=20000}" : "${RL_EVAL_EPS:=10}" : "${RL_MAX_TURNS:=100}" : "${RL_PIDFILE:=/tmp/rl-train.pid}" LOG_REMOTE="${RL_WORKTREE}/training-${RL_RUN_NAME}.log" EVAL_DIR_REMOTE="${RL_WORKTREE}/tooling/rl_self_play/runs/${RL_RUN_NAME}/eval" cmd="${1:-status}" shift || true remote() { ssh "${RL_HOST}" "$1"; } case "$cmd" in status) remote " set +e echo '---PYTHON PID---' py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}') if [ -z \"\$py\" ]; then echo 'no training process' echo '---EVAL DIR (${EVAL_DIR_REMOTE})---' ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing' exit 0 fi ps -p \$py -o pid,etime,pcpu,pmem,cmd log=\$(readlink /proc/\$py/fd/1 2>/dev/null) echo \"---LOG (\$log)---\" tail -40 \"\$log\" 2>/dev/null || echo 'log not readable' echo '---EVAL DIR (${EVAL_DIR_REMOTE})---' ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing' " ;; logs) n="${1:-60}" remote " py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}') if [ -z \"\$py\" ]; then echo 'no training process'; exit 1; fi tail -${n} \"\$(readlink /proc/\$py/fd/1)\" " ;; gpu) remote 'nvidia-smi --query-gpu=index,name,memory.used,memory.total,utilization.gpu --format=csv' ;; procs) remote " printf 'python train procs: '; pgrep -af 'python3 -m tooling.rl_self_play.train' | wc -l printf 'godot-bin procs: '; pgrep -af 'godot-bin' | wc -l uptime " ;; launch) remote " cd ${RL_WORKTREE} || exit 1 if pgrep -f 'python3 -m tooling.rl_self_play.train' >/dev/null; then echo 'training already running; run kill first' pgrep -af 'python3 -m tooling.rl_self_play.train' exit 1 fi nohup python3 -m tooling.rl_self_play.train \ --device ${RL_DEVICE} --num-envs ${RL_ENVS} \ --total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \ --eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \ --run-name ${RL_RUN_NAME} > ${LOG_REMOTE} 2>&1 & echo \$! > ${RL_PIDFILE} sleep 3 pgrep -af 'python3 -m tooling.rl_self_play.train' || (echo 'launch failed; check log'; tail -20 ${LOG_REMOTE}) " ;; kill) remote " pkill -f 'rl_self_play.train' 2>/dev/null || true sleep 5 pkill -9 -f 'rl_self_play.train' 2>/dev/null || true pkill -9 -f 'godot-bin --path' 2>/dev/null || true printf 'remaining: ' pgrep -f 'rl_self_play.train|godot-bin' | wc -l " ;; sync) here="$(cd "$(dirname "$0")/.." && pwd)" scp -q "${here}/tooling/rl_self_play/"*.py \ "${RL_HOST}:${RL_WORKTREE}/tooling/rl_self_play/" echo "synced tooling/rl_self_play/*.py to ${RL_HOST}:${RL_WORKTREE}" ;; *) sed -n '2,28p' "$0" exit 2 ;; esac