From a6f909a1513c9383c0c477520908e01fff3fd6af Mon Sep 17 00:00:00 2001
From: Natalie <natalie@lilithuwu.com>
Date: Sun, 17 May 2026 05:22:23 -0700
Subject: [PATCH] =?UTF-8?q?feat(@projects/@magic-civilization):=20?=
 =?UTF-8?q?=E2=9C=A8=20add=20rl-train=20management=20script?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
---
 scripts/rl-train.sh | 129 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100755 scripts/rl-train.sh

diff --git a/scripts/rl-train.sh b/scripts/rl-train.sh
new file mode 100755
index 00000000..85908988
--- /dev/null
+++ b/scripts/rl-train.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+# rl-train.sh — manage the RL self-play training run on apricot.
+#
+# Subcommands:
+#   status      Show PID, elapsed, fps, last train block, eval dir state
+#   logs [N]    Tail N (default 60) lines of the active training log
+#   gpu         Show nvidia-smi snapshot
+#   procs       Count godot/python procs related to training
+#   launch      Start training (env-driven; see ENV below)
+#   kill        Stop training cleanly (SIGTERM, then SIGKILL after 5s)
+#   sync        scp local rl_self_play/ to the apricot worktree
+#
+# Env vars (with defaults):
+#   RL_HOST=apricot               SSH alias for the training box
+#   RL_WORKTREE=/var/home/lilith/.cache/mc-rl-train-1779015795
+#   RL_RUN_NAME=duel-v1b-cuda1
+#   RL_DEVICE=cuda:1
+#   RL_ENVS=4
+#   RL_TOTAL_STEPS=200000
+#   RL_EVAL_FREQ=20000
+#   RL_EVAL_EPS=10
+#   RL_MAX_TURNS=100
+#   RL_PIDFILE=/tmp/rl-train.pid  (on the remote)
+
+set -euo pipefail
+
+: "${RL_HOST:=apricot}"
+: "${RL_WORKTREE:=/var/home/lilith/.cache/mc-rl-train-1779015795}"
+: "${RL_RUN_NAME:=duel-v1b-cuda1}"
+: "${RL_DEVICE:=cuda:1}"
+: "${RL_ENVS:=4}"
+: "${RL_TOTAL_STEPS:=200000}"
+: "${RL_EVAL_FREQ:=20000}"
+: "${RL_EVAL_EPS:=10}"
+: "${RL_MAX_TURNS:=100}"
+: "${RL_PIDFILE:=/tmp/rl-train.pid}"
+
+LOG_REMOTE="${RL_WORKTREE}/training-${RL_RUN_NAME}.log"
+EVAL_DIR_REMOTE="${RL_WORKTREE}/tooling/rl_self_play/runs/${RL_RUN_NAME}/eval"
+
+cmd="${1:-status}"
+shift || true
+
+remote() { ssh "${RL_HOST}" "$1"; }
+
+case "$cmd" in
+  status)
+    remote "
+      set +e
+      echo '---PYTHON PID---'
+      py=\$(pgrep -f 'python3 -m tooling.rl_self_play.train' | head -1)
+      if [ -z \"\$py\" ]; then
+        echo 'no training process'
+        echo '---EVAL DIR (${EVAL_DIR_REMOTE})---'
+        ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing'
+        exit 0
+      fi
+      ps -p \$py -o pid,etime,pcpu,pmem,cmd
+      log=\$(readlink /proc/\$py/fd/1 2>/dev/null)
+      echo \"---LOG (\$log)---\"
+      tail -40 \"\$log\" 2>/dev/null || echo 'log not readable'
+      echo '---EVAL DIR (${EVAL_DIR_REMOTE})---'
+      ls -la ${EVAL_DIR_REMOTE} 2>/dev/null || echo 'missing'
+    "
+    ;;
+
+  logs)
+    n="${1:-60}"
+    remote "
+      py=\$(pgrep -f 'python3 -m tooling.rl_self_play.train' | head -1)
+      if [ -z \"\$py\" ]; then echo 'no training process'; exit 1; fi
+      tail -${n} \"\$(readlink /proc/\$py/fd/1)\"
+    "
+    ;;
+
+  gpu)
+    remote 'nvidia-smi --query-gpu=index,name,memory.used,memory.total,utilization.gpu --format=csv'
+    ;;
+
+  procs)
+    remote "
+      printf 'python train procs: '; pgrep -af 'python3 -m tooling.rl_self_play.train' | wc -l
+      printf 'godot-bin procs: ';   pgrep -af 'godot-bin' | wc -l
+      uptime
+    "
+    ;;
+
+  launch)
+    remote "
+      cd ${RL_WORKTREE} || exit 1
+      if pgrep -f 'python3 -m tooling.rl_self_play.train' >/dev/null; then
+        echo 'training already running; run kill first'
+        pgrep -af 'python3 -m tooling.rl_self_play.train'
+        exit 1
+      fi
+      nohup python3 -m tooling.rl_self_play.train \
+        --device ${RL_DEVICE} --num-envs ${RL_ENVS} \
+        --total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \
+        --eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \
+        --run-name ${RL_RUN_NAME} > ${LOG_REMOTE} 2>&1 &
+      echo \$! > ${RL_PIDFILE}
+      sleep 3
+      pgrep -af 'python3 -m tooling.rl_self_play.train' || (echo 'launch failed; check log'; tail -20 ${LOG_REMOTE})
+    "
+    ;;
+
+  kill)
+    remote "
+      pkill -f 'rl_self_play.train' 2>/dev/null || true
+      sleep 5
+      pkill -9 -f 'rl_self_play.train' 2>/dev/null || true
+      pkill -9 -f 'godot-bin --path' 2>/dev/null || true
+      printf 'remaining: '
+      pgrep -f 'rl_self_play.train|godot-bin' | wc -l
+    "
+    ;;
+
+  sync)
+    here="$(cd "$(dirname "$0")/.." && pwd)"
+    scp -q "${here}/tooling/rl_self_play/"*.py \
+      "${RL_HOST}:${RL_WORKTREE}/tooling/rl_self_play/"
+    echo "synced tooling/rl_self_play/*.py to ${RL_HOST}:${RL_WORKTREE}"
+    ;;
+
+  *)
+    sed -n '2,28p' "$0"
+    exit 2
+    ;;
+esac