feat(tooling): add apricot gpu device guidance

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-05-17 04:02:09 -07:00
parent 7cdc8178b7
commit de5fbd42c4
2 changed files with 47 additions and 0 deletions

View file

@ -72,6 +72,26 @@ python -m tooling.rl_self_play.train --total-steps 1_000_000 --num-envs 4
tensorboard --logdir tooling/rl_self_play/runs/
```
### Apricot GPU layout
Apricot has 2× NVIDIA RTX 3090 (24 GB each). The typical division:
- `cuda:0` — model-boss inference / commit-message daemon (frequently busy).
- `cuda:1` — free; use this for RL training to avoid contention.
```bash
ssh apricot
cd ~/Code/project-buildspace/magic-civilization # or wherever the canonical checkout lives
pip install -r tooling/rl_self_play/requirements.txt # one-time
python -m tooling.rl_self_play.train --device cuda:1 --num-envs 8 --total-steps 5_000_000
```
`--device auto` is the safe default for a single-GPU box or local Mac
(`mps` on Apple Silicon). The MlpPolicy this scaffold uses fits in
well under 1 GB VRAM, so the bottleneck is the harness CPU subprocesses
rather than the GPU. Raise `--num-envs` (one harness each) to keep
the GPU fed.
For evaluation only (no training):
```bash

View file

@ -68,6 +68,12 @@ def _build_argparser() -> argparse.ArgumentParser:
help="Subdirectory under runs/ + models/ (default: duel-v1).")
p.add_argument("--seed", type=int, default=42,
help="Base RNG seed; per-env seeds offset from this (default: 42).")
p.add_argument("--device", default="auto",
help=("Torch device for the policy net: 'auto' (default — "
"picks cuda if available, else cpu), 'cuda', "
"'cuda:1' (second GPU), 'mps' (Apple Silicon), or "
"'cpu'. On apricot, prefer 'cuda:1' so cuda:0 stays "
"free for model-boss / MCTS rollouts."))
return p
@ -130,12 +136,33 @@ def main() -> int:
render=False,
)
# Resolve `--device` for logging clarity — sb3 accepts 'auto' but we
# want to print exactly which device the rollouts will land on so a
# multi-GPU box (apricot has 2× RTX 3090) can be confirmed at a glance.
import torch # type: ignore[import-not-found]
if args.device == "auto":
if torch.cuda.is_available():
resolved_device = "cuda"
elif getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available():
resolved_device = "mps"
else:
resolved_device = "cpu"
else:
resolved_device = args.device
print(
f"policy device: {resolved_device} "
f"(cuda_available={torch.cuda.is_available()}, "
f"cuda_devices={torch.cuda.device_count() if torch.cuda.is_available() else 0})"
)
model = MaskablePPO(
"MlpPolicy",
train_env,
verbose=1,
tensorboard_log=str(run_dir),
seed=args.seed,
device=resolved_device,
n_steps=512,
batch_size=128,
learning_rate=3e-4,