diff --git a/scripts/player-api-server.sh b/scripts/player-api-server.sh index 36d5c8aa..17c0984b 100755 --- a/scripts/player-api-server.sh +++ b/scripts/player-api-server.sh @@ -42,7 +42,8 @@ PROJECT_DIR="$(dirname "$SCRIPT_DIR")" # flatpak runtime just for this harness is silly when native Godot 4 # works directly. Env-var passthrough is automatic for the native path. export CP_SEED CP_PLAYERS CP_PLAYER_SLOT CP_MAP_SIZE CP_MAP_TYPE \ - CP_OMNISCIENT CP_TIMEOUT_SEC CP_LOG_FILE CP_VICTORY_MODE + CP_OMNISCIENT CP_TIMEOUT_SEC CP_LOG_FILE CP_VICTORY_MODE \ + CP_PLAYER_CONTROLLERS CP_PLAYER_SLOTS case "$(uname -s)" in Darwin) @@ -68,6 +69,8 @@ case "$(uname -s)" in --env=CP_TIMEOUT_SEC="$CP_TIMEOUT_SEC" \ --env=CP_LOG_FILE="$CP_LOG_FILE" \ --env=CP_VICTORY_MODE="${CP_VICTORY_MODE:-}" \ + --env=CP_PLAYER_CONTROLLERS="${CP_PLAYER_CONTROLLERS:-}" \ + --env=CP_PLAYER_SLOTS="${CP_PLAYER_SLOTS:-}" \ org.godotengine.Godot \ --path "$PROJECT_DIR/src/game" \ --headless \ diff --git a/tooling/rl_self_play/harness_client.py b/tooling/rl_self_play/harness_client.py index c12fcfa2..8bf3146b 100644 --- a/tooling/rl_self_play/harness_client.py +++ b/tooling/rl_self_play/harness_client.py @@ -42,6 +42,14 @@ class HarnessConfig: seed: int = 42 players: int = 2 player_slot: int = 0 + # Stage 4 (multi-slot adapter) — externally-driven slots when this + # process is driving more than one slot (e.g. 5 learned slots in a + # 5v5 FFA). When this tuple has >1 entry, every wire `view` / `act` + # call MUST include a `slot` field naming which slot it targets; + # `HarnessClient.view`/`act`/`end_turn` accept an optional `slot` + # kwarg for this. Defaults to `(player_slot,)` so single-slot + # callers keep the existing wire shape unchanged. + player_slots: tuple[int, ...] = () map_size: str = "duel" map_type: str = "continents" omniscient: bool = False @@ -53,18 +61,34 @@ class HarnessConfig: # simulator falls back to a city-count check that almost never fires # in 1v1 duel play. victory_mode: str = "domination" + # Stage 4 — per-AI-slot controller registry id, comma-joined in slot + # order over AI slots (i.e. excluding `player_slot`). Empty = every + # AI slot defaults to `"scripted:default"` (the MCTS+heuristic). + # Set this to mix learned + scripted opponents in one game, e.g. + # `("learned:duel-v1b", "", "")` puts learned on the first AI slot. + player_controllers: tuple[str, ...] = () + + @property + def effective_player_slots(self) -> tuple[int, ...]: + """Resolve the back-compat fallback: empty tuple → `(player_slot,)`.""" + return self.player_slots if self.player_slots else (self.player_slot,) def to_env(self) -> dict[str, str]: - return { + slots = self.effective_player_slots + env: dict[str, str] = { "CP_SEED": str(self.seed), "CP_PLAYERS": str(self.players), - "CP_PLAYER_SLOT": str(self.player_slot), + "CP_PLAYER_SLOT": str(slots[0]), + "CP_PLAYER_SLOTS": ",".join(str(s) for s in slots), "CP_MAP_SIZE": self.map_size, "CP_MAP_TYPE": self.map_type, "CP_OMNISCIENT": "1" if self.omniscient else "0", "CP_TIMEOUT_SEC": str(self.timeout_sec), "CP_VICTORY_MODE": self.victory_mode, } + if self.player_controllers: + env["CP_PLAYER_CONTROLLERS"] = ",".join(self.player_controllers) + return env class HarnessClient: @@ -128,24 +152,32 @@ class HarnessClient: f"no correlated response for id={msg['id']} within {MAX_LINES_PER_RESPONSE} lines" ) - def view(self) -> dict[str, Any]: - r = self._send({"type": "view"}) + def view(self, slot: int | None = None) -> dict[str, Any]: + msg: dict[str, Any] = {"type": "view"} + if slot is not None: + msg["slot"] = slot + r = self._send(msg) if not r.get("ok"): raise HarnessError(f"view failed: {r.get('error')}") return r["view"] - def act(self, action: dict[str, Any]) -> dict[str, Any]: - r = self._send({"type": "act", "action": action}) + def act( + self, action: dict[str, Any], slot: int | None = None + ) -> dict[str, Any]: + msg: dict[str, Any] = {"type": "act", "action": action} + if slot is not None: + msg["slot"] = slot + r = self._send(msg) if not r.get("ok"): err = r.get("error", {}) raise HarnessError( - f"act({action.get('type')!r}) failed: " + f"act({action.get('type')!r}, slot={slot}) failed: " f"{err.get('code')}: {err.get('message')}" ) return r - def end_turn(self) -> dict[str, Any]: - return self.act({"type": "end_turn"}) + def end_turn(self, slot: int | None = None) -> dict[str, Any]: + return self.act({"type": "end_turn"}, slot=slot) def drain_notifications(self) -> list[dict[str, Any]]: """Pop and return all async notifications that arrived since the