feat(tooling): ✨ add smoke test for protocol layer
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
b7891991a4
commit
7cdc8178b7
3 changed files with 171 additions and 6 deletions
|
|
@ -33,6 +33,7 @@ the right shape.
|
|||
| `magic_civ_env.py` | `gymnasium.Env` subclass exposing the harness as one episode = one game. Implements `action_masks()` for MaskablePPO. |
|
||||
| `train.py` | CLI entry. Builds K parallel envs (each its own harness), runs MaskablePPO, periodically evaluates against the same baseline, saves best model. |
|
||||
| `evaluate.py` | Standalone eval — load a saved model, run N games, print `{episodes, wins, losses, draws, win_rate, mean_turns}` JSON. |
|
||||
| `smoke.py` | Stdlib-only CI gate. Drives the harness + encoders through a random-policy loop without importing `gymnasium`/`sb3`/`torch`. Prints a one-line JSON verdict; exit 0 on `passed: true`. Run before any training session to confirm the protocol layer is intact. |
|
||||
| `requirements.txt` | Pinned versions; `pip install -r requirements.txt` is the one-time setup. |
|
||||
|
||||
## Methodology
|
||||
|
|
@ -53,8 +54,18 @@ the right shape.
|
|||
|
||||
## Run it
|
||||
|
||||
Smoke test the protocol layer first (no heavy deps required):
|
||||
|
||||
```bash
|
||||
cd /Users/natalie/Code/@projects/@magic-civilization
|
||||
python3 -m tooling.rl_self_play.smoke --turns 30
|
||||
# → {"steps": 332, "turns_reached": 30, "mask_violations": 0,
|
||||
# "harness_errors": 0, "passed": true}
|
||||
```
|
||||
|
||||
Then install RL deps and train:
|
||||
|
||||
```bash
|
||||
pip install -r tooling/rl_self_play/requirements.txt
|
||||
python -m tooling.rl_self_play.train --total-steps 1_000_000 --num-envs 4
|
||||
# In a second terminal:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
# Pinned to versions that are known to compose cleanly with sb3-contrib's
|
||||
# MaskablePPO as of 2026-Q2. Bump together — sb3 and sb3-contrib track in
|
||||
# lockstep, and torch's wheel ABI changes between minor versions.
|
||||
# Pinned to versions known to compose cleanly with sb3-contrib's
|
||||
# MaskablePPO and Python 3.12+ as of 2026-Q2. Bump together — sb3 and
|
||||
# sb3-contrib track in lockstep; torch's wheel ABI changes between
|
||||
# minor versions. torch 2.5+ is required for Python 3.13 support.
|
||||
gymnasium==1.2.1
|
||||
stable-baselines3==2.7.0
|
||||
sb3-contrib==2.7.0
|
||||
torch==2.4.1
|
||||
stable-baselines3>=2.8.0,<2.10
|
||||
sb3-contrib>=2.8.0,<2.10
|
||||
torch>=2.5.0,<2.7
|
||||
numpy>=2.0,<3
|
||||
tensorboard>=2.18
|
||||
|
|
|
|||
153
tooling/rl_self_play/smoke.py
Normal file
153
tooling/rl_self_play/smoke.py
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
"""Stdlib-only smoke test for the harness + encoder layer.
|
||||
|
||||
Verifies — without needing `gymnasium`, `stable-baselines3`, or `torch` —
|
||||
that the protocol shim works end-to-end:
|
||||
|
||||
1. `HarnessClient` spawns the Godot subprocess and returns a valid
|
||||
`view` JSON on first request.
|
||||
2. `encode_observation` projects every view into a fixed-shape
|
||||
`np.float32[OBS_DIM]` without raising.
|
||||
3. `encode_legal_actions` produces a boolean mask whose `True`
|
||||
positions all map back to a legal `PlayerAction` via
|
||||
`decode_action_index`.
|
||||
4. A random-policy loop bounded by `--turns` reaches the turn limit
|
||||
OR terminates cleanly without raising `HarnessError`.
|
||||
|
||||
Run:
|
||||
python3 -m tooling.rl_self_play.smoke [--turns 30] [--seed 42]
|
||||
|
||||
Output is one-line JSON like:
|
||||
|
||||
{"steps": 87, "turns_reached": 30, "mask_violations": 0,
|
||||
"harness_errors": 0, "obs_dim": 32, "action_dim": 322,
|
||||
"episodes": 1, "passed": true}
|
||||
|
||||
Exit 0 on `passed: true`; non-zero otherwise. Suitable as a CI gate
|
||||
before any real training run.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
THIS_DIR = Path(__file__).resolve().parent
|
||||
PROJECT_ROOT = THIS_DIR.parents[1]
|
||||
if __package__ is None:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from tooling.rl_self_play.encoders import ( # noqa: E402
|
||||
ACTION_DIM,
|
||||
OBS_DIM,
|
||||
decode_action_index,
|
||||
encode_legal_actions,
|
||||
encode_observation,
|
||||
)
|
||||
from tooling.rl_self_play.harness_client import ( # noqa: E402
|
||||
HarnessClient,
|
||||
HarnessConfig,
|
||||
HarnessError,
|
||||
)
|
||||
|
||||
|
||||
def _build_argparser() -> argparse.ArgumentParser:
|
||||
p = argparse.ArgumentParser(description="Smoke-test the harness + encoder layer")
|
||||
p.add_argument("--turns", type=int, default=30, help="Max turns per episode")
|
||||
p.add_argument("--episodes", type=int, default=1, help="Episodes to run")
|
||||
p.add_argument("--seed", type=int, default=42, help="Base RNG seed")
|
||||
p.add_argument("--players", type=int, default=2)
|
||||
p.add_argument("--map-size", default="duel")
|
||||
return p
|
||||
|
||||
|
||||
def _run_episode(
|
||||
client: HarnessClient, rng: np.random.Generator, max_turns: int
|
||||
) -> dict[str, int]:
|
||||
steps = 0
|
||||
mask_violations = 0
|
||||
eliminations = 0
|
||||
last_turn = 0
|
||||
view = client.view()
|
||||
while last_turn < max_turns:
|
||||
obs = encode_observation(view)
|
||||
if obs.shape != (OBS_DIM,):
|
||||
mask_violations += 1
|
||||
break
|
||||
mask, idx_to_action = encode_legal_actions(view)
|
||||
legal_indices = np.where(mask)[0]
|
||||
if legal_indices.size == 0:
|
||||
mask_violations += 1
|
||||
break
|
||||
idx = int(rng.choice(legal_indices))
|
||||
action = decode_action_index(idx, idx_to_action)
|
||||
if action.get("type") == "end_turn":
|
||||
client.end_turn()
|
||||
else:
|
||||
client.act(action)
|
||||
view = client.view()
|
||||
last_turn = int(view.get("turn", 0))
|
||||
steps += 1
|
||||
score = view.get("score", {})
|
||||
if int(score.get("city_count", 0)) == 0:
|
||||
units = view.get("units", [])
|
||||
me = int(view.get("player", 0))
|
||||
has_founder = any(
|
||||
int(u.get("owner", -1)) == me
|
||||
and "founder" in str(u.get("type", ""))
|
||||
and float(u.get("hp", 0)) > 0
|
||||
for u in units
|
||||
)
|
||||
if not has_founder:
|
||||
eliminations += 1
|
||||
break
|
||||
return {
|
||||
"steps": steps,
|
||||
"turns_reached": last_turn,
|
||||
"mask_violations": mask_violations,
|
||||
"eliminations": eliminations,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = _build_argparser().parse_args()
|
||||
rng = np.random.default_rng(args.seed)
|
||||
totals = {
|
||||
"steps": 0,
|
||||
"turns_reached": 0,
|
||||
"mask_violations": 0,
|
||||
"eliminations": 0,
|
||||
"harness_errors": 0,
|
||||
}
|
||||
for episode in range(args.episodes):
|
||||
cfg = HarnessConfig(
|
||||
seed=args.seed + episode,
|
||||
players=args.players,
|
||||
player_slot=0,
|
||||
map_size=args.map_size,
|
||||
)
|
||||
with HarnessClient(cfg) as client:
|
||||
try:
|
||||
result = _run_episode(client, rng, args.turns)
|
||||
except HarnessError:
|
||||
totals["harness_errors"] += 1
|
||||
continue
|
||||
totals["steps"] += result["steps"]
|
||||
totals["turns_reached"] = max(totals["turns_reached"], result["turns_reached"])
|
||||
totals["mask_violations"] += result["mask_violations"]
|
||||
totals["eliminations"] += result["eliminations"]
|
||||
verdict = {
|
||||
**totals,
|
||||
"obs_dim": OBS_DIM,
|
||||
"action_dim": ACTION_DIM,
|
||||
"episodes": args.episodes,
|
||||
"passed": totals["mask_violations"] == 0 and totals["harness_errors"] == 0,
|
||||
}
|
||||
print(json.dumps(verdict))
|
||||
return 0 if verdict["passed"] else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Reference in a new issue