chore(rl-train): 🔧 Update RL training script with new environment variables and execution flow for dynamic environment selection and training modes
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
0f24c80f1b
commit
595486b7b3
1 changed files with 40 additions and 12 deletions
|
|
@ -86,28 +86,56 @@ case "$cmd" in
|
||||||
;;
|
;;
|
||||||
|
|
||||||
launch)
|
launch)
|
||||||
|
# Launch as a transient systemd --user .service under heavy-tests.slice.
|
||||||
|
# The slice (CPUWeight=20, MemoryMax=32G, TasksMax=4096) prevents the godot
|
||||||
|
# workers spawned by the python parent from starving sshd/interactive work.
|
||||||
|
# Every child process (flatpak, bwrap, godot-bin) inherits the cgroup, so a
|
||||||
|
# 3000-proc explosion stays contained — exactly the wedge mode seen on
|
||||||
|
# 2026-05-18 and 2026-05-19.
|
||||||
|
#
|
||||||
|
# Unit name includes epoch so re-launches with the same RL_RUN_NAME don't
|
||||||
|
# collide with a stopped-but-not-yet-collected unit.
|
||||||
|
RL_UNIT="rl-train-${RL_RUN_NAME}-$(date +%s)"
|
||||||
remote "
|
remote "
|
||||||
cd ${RL_WORKTREE} || exit 1
|
set -e
|
||||||
existing=\$(ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/ {print \$1}')
|
cd ${RL_WORKTREE}
|
||||||
|
existing=\$(systemctl --user list-units --type=service --no-legend --state=running 'rl-train-*' 2>/dev/null | awk '{print \$1}' | head -1)
|
||||||
if [ -n \"\$existing\" ]; then
|
if [ -n \"\$existing\" ]; then
|
||||||
echo 'training already running; run kill first'
|
echo \"training already running: \$existing — run 'kill' first\"
|
||||||
echo \"\$existing\"
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
nohup python3 -m tooling.rl_self_play.train \
|
systemd-run --user \\
|
||||||
--device ${RL_DEVICE} --num-envs ${RL_ENVS} \
|
--slice=heavy-tests.slice \\
|
||||||
--total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \
|
--unit=${RL_UNIT} \\
|
||||||
--eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \
|
--collect --quiet \\
|
||||||
--run-name ${RL_RUN_NAME} > ${LOG_REMOTE} 2>&1 &
|
--working-directory=${RL_WORKTREE} \\
|
||||||
echo \$! > ${RL_PIDFILE}
|
--setenv=PYTHONUNBUFFERED=1 \\
|
||||||
|
--property=StandardOutput=append:${LOG_REMOTE} \\
|
||||||
|
--property=StandardError=append:${LOG_REMOTE} \\
|
||||||
|
-- python3 -m tooling.rl_self_play.train \\
|
||||||
|
--device ${RL_DEVICE} --num-envs ${RL_ENVS} \\
|
||||||
|
--total-steps ${RL_TOTAL_STEPS} --eval-freq ${RL_EVAL_FREQ} \\
|
||||||
|
--eval-episodes ${RL_EVAL_EPS} --max-turns ${RL_MAX_TURNS} \\
|
||||||
|
--run-name ${RL_RUN_NAME}
|
||||||
|
echo ${RL_UNIT} > ${RL_PIDFILE}
|
||||||
sleep 3
|
sleep 3
|
||||||
ps -eo pid,comm,args | awk '\$2 ~ /^python/ && /rl_self_play.train/' \\
|
systemctl --user status ${RL_UNIT} --no-pager --lines=0
|
||||||
|| (echo 'launch failed; check log'; tail -20 ${LOG_REMOTE})
|
systemctl --user show ${RL_UNIT} --property=MainPID
|
||||||
"
|
"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
kill)
|
kill)
|
||||||
|
# Stop all rl-train-* transient services. systemd cascades SIGTERM through
|
||||||
|
# the cgroup, then SIGKILL after TimeoutStopSec, reaping all godot children.
|
||||||
|
# Falls back to pkill for any procs not in a unit (legacy runs / orphans).
|
||||||
remote "
|
remote "
|
||||||
|
units=\$(systemctl --user list-units --type=service --no-legend 'rl-train-*' 2>/dev/null | awk '{print \$1}')
|
||||||
|
if [ -n \"\$units\" ]; then
|
||||||
|
echo \"stopping units:\"
|
||||||
|
echo \"\$units\"
|
||||||
|
echo \"\$units\" | xargs -r systemctl --user stop
|
||||||
|
fi
|
||||||
|
# Legacy / out-of-unit sweep
|
||||||
pkill -f 'rl_self_play.train' 2>/dev/null || true
|
pkill -f 'rl_self_play.train' 2>/dev/null || true
|
||||||
sleep 5
|
sleep 5
|
||||||
pkill -9 -f 'rl_self_play.train' 2>/dev/null || true
|
pkill -9 -f 'rl_self_play.train' 2>/dev/null || true
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue