magicciv/scripts/rl-train.sh

130 lines
4 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# rl-train.sh — manage the RL self-play training run on apricot.
#
# Subcommands:
# status Show PID, elapsed, fps, last train block, eval dir state
# logs [N] Tail N (default 60) lines of the active training log
# gpu Show nvidia-smi snapshot
# procs Count godot/python procs related to training
# launch Start training (env-driven; see ENV below)
# kill Stop training cleanly (SIGTERM, then SIGKILL after 5s)
# sync scp local rl_self_play/ to the apricot worktree
#
# Env vars (with defaults):
# RL_HOST=apricot SSH alias for the training box
# RL_WORKTREE=/var/home/lilith/.cache/mc-rl-train-1779015795
# RL_RUN_NAME=duel-v1b-cuda1
# RL_DEVICE=cuda:1
# RL_ENVS=4
# RL_TOTAL_STEPS=200000
# RL_EVAL_FREQ=20000
# RL_EVAL_EPS=10
# RL_MAX_TURNS=100
# RL_PIDFILE=/tmp/rl-train.pid (on the remote)
set -euo pipefail
: "${RL_HOST:=apricot}"
: "${RL_WORKTREE:=/var/home/lilith/.cache/mc-rl-train-1779015795}"
: "${RL_RUN_NAME:=duel-v1b-cuda1}"
: "${RL_DEVICE:=cuda:1}"
: "${RL_ENVS:=4}"
: "${RL_TOTAL_STEPS:=200000}"
: "${RL_EVAL_FREQ:=20000}"
: "${RL_EVAL_EPS:=10}"
: "${RL_MAX_TURNS:=100}"
: "${RL_PIDFILE:=/tmp/rl-train.pid}"
LOG_REMOTE="${RL_WORKTREE}/training-${RL_RUN_NAME}.log"
EVAL_DIR_REMOTE="${RL_WORKTREE}/tooling/rl_self_play/runs/${RL_RUN_NAME}/eval"
cmd="${1:-status}"
shift || true
remote() { ssh "${RL_HOST}" "$1"; }
case "$cmd" in
status)
remote "
set +e
echo '---PYTHON PID---'
py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}')
if [ -z \"\$py\" ]; then
echo 'no training process'
echo '---EVAL DIR (${EVAL_DIR_REMOTE})---'
ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing'
exit 0
fi
ps -p \$py -o pid,etime,pcpu,pmem,cmd
log=\$(readlink /proc/\$py/fd/1 2>/dev/null)
echo \"---LOG (\$log)---\"
tail -40 \"\$log\" 2>/dev/null || echo 'log not readable'
echo '---EVAL DIR (${EVAL_DIR_REMOTE})---'
ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing'
"
;;
logs)
n="${1:-60}"
remote "
py=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1; exit}')
if [ -z \"\$py\" ]; then echo 'no training process'; exit 1; fi
tail -${n} \"\$(readlink /proc/\$py/fd/1)\"
"
;;
gpu)
remote 'nvidia-smi --query-gpu=index,name,memory.used,memory.total,utilization.gpu --format=csv'
;;
procs)
remote "
printf 'python train procs: '; pgrep -af 'python3 -m tooling.rl_self_play.train' | wc -l
printf 'godot-bin procs: '; pgrep -af 'godot-bin' | wc -l
uptime
"
;;
launch)
remote "
cd ${RL_WORKTREE} || exit 1
if pgrep -f 'python3 -m tooling.rl_self_play.train' >/dev/null; then
echo 'training already running; run kill first'
pgrep -af 'python3 -m tooling.rl_self_play.train'
exit 1
fi
nohup python3 -m tooling.rl_self_play.train \
--device ${RL_DEVICE} --num-envs ${RL_ENVS} \
--total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \
--eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \
--run-name ${RL_RUN_NAME} > ${LOG_REMOTE} 2>&1 &
echo \$! > ${RL_PIDFILE}
sleep 3
pgrep -af 'python3 -m tooling.rl_self_play.train' || (echo 'launch failed; check log'; tail -20 ${LOG_REMOTE})
"
;;
kill)
remote "
pkill -f 'rl_self_play.train' 2>/dev/null || true
sleep 5
pkill -9 -f 'rl_self_play.train' 2>/dev/null || true
pkill -9 -f 'godot-bin --path' 2>/dev/null || true
printf 'remaining: '
pgrep -f 'rl_self_play.train|godot-bin' | wc -l
"
;;
sync)
here="$(cd "$(dirname "$0")/.." && pwd)"
scp -q "${here}/tooling/rl_self_play/"*.py \
"${RL_HOST}:${RL_WORKTREE}/tooling/rl_self_play/"
echo "synced tooling/rl_self_play/*.py to ${RL_HOST}:${RL_WORKTREE}"
;;
*)
sed -n '2,28p' "$0"
exit 2
;;
esac