feat(tooling): ✨ add apricot gpu device guidance
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
7cdc8178b7
commit
de5fbd42c4
2 changed files with 47 additions and 0 deletions
|
|
@ -72,6 +72,26 @@ python -m tooling.rl_self_play.train --total-steps 1_000_000 --num-envs 4
|
|||
tensorboard --logdir tooling/rl_self_play/runs/
|
||||
```
|
||||
|
||||
### Apricot GPU layout
|
||||
|
||||
Apricot has 2× NVIDIA RTX 3090 (24 GB each). The typical division:
|
||||
|
||||
- `cuda:0` — model-boss inference / commit-message daemon (frequently busy).
|
||||
- `cuda:1` — free; use this for RL training to avoid contention.
|
||||
|
||||
```bash
|
||||
ssh apricot
|
||||
cd ~/Code/project-buildspace/magic-civilization # or wherever the canonical checkout lives
|
||||
pip install -r tooling/rl_self_play/requirements.txt # one-time
|
||||
python -m tooling.rl_self_play.train --device cuda:1 --num-envs 8 --total-steps 5_000_000
|
||||
```
|
||||
|
||||
`--device auto` is the safe default for a single-GPU box or local Mac
|
||||
(`mps` on Apple Silicon). The MlpPolicy this scaffold uses fits in
|
||||
well under 1 GB VRAM, so the bottleneck is the harness CPU subprocesses
|
||||
rather than the GPU. Raise `--num-envs` (one harness each) to keep
|
||||
the GPU fed.
|
||||
|
||||
For evaluation only (no training):
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -68,6 +68,12 @@ def _build_argparser() -> argparse.ArgumentParser:
|
|||
help="Subdirectory under runs/ + models/ (default: duel-v1).")
|
||||
p.add_argument("--seed", type=int, default=42,
|
||||
help="Base RNG seed; per-env seeds offset from this (default: 42).")
|
||||
p.add_argument("--device", default="auto",
|
||||
help=("Torch device for the policy net: 'auto' (default — "
|
||||
"picks cuda if available, else cpu), 'cuda', "
|
||||
"'cuda:1' (second GPU), 'mps' (Apple Silicon), or "
|
||||
"'cpu'. On apricot, prefer 'cuda:1' so cuda:0 stays "
|
||||
"free for model-boss / MCTS rollouts."))
|
||||
return p
|
||||
|
||||
|
||||
|
|
@ -130,12 +136,33 @@ def main() -> int:
|
|||
render=False,
|
||||
)
|
||||
|
||||
# Resolve `--device` for logging clarity — sb3 accepts 'auto' but we
|
||||
# want to print exactly which device the rollouts will land on so a
|
||||
# multi-GPU box (apricot has 2× RTX 3090) can be confirmed at a glance.
|
||||
import torch # type: ignore[import-not-found]
|
||||
|
||||
if args.device == "auto":
|
||||
if torch.cuda.is_available():
|
||||
resolved_device = "cuda"
|
||||
elif getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available():
|
||||
resolved_device = "mps"
|
||||
else:
|
||||
resolved_device = "cpu"
|
||||
else:
|
||||
resolved_device = args.device
|
||||
print(
|
||||
f"policy device: {resolved_device} "
|
||||
f"(cuda_available={torch.cuda.is_available()}, "
|
||||
f"cuda_devices={torch.cuda.device_count() if torch.cuda.is_available() else 0})"
|
||||
)
|
||||
|
||||
model = MaskablePPO(
|
||||
"MlpPolicy",
|
||||
train_env,
|
||||
verbose=1,
|
||||
tensorboard_log=str(run_dir),
|
||||
seed=args.seed,
|
||||
device=resolved_device,
|
||||
n_steps=512,
|
||||
batch_size=128,
|
||||
learning_rate=3e-4,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue