feat(@projects/@magic-civilization): ✨ add rl-self-play harness and Claude player integration
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
82c464e31e
commit
ad108810dd
11 changed files with 440 additions and 46 deletions
|
|
@ -15,7 +15,7 @@
|
||||||
],
|
],
|
||||||
"env": {
|
"env": {
|
||||||
"CP_PLAYERS": "2",
|
"CP_PLAYERS": "2",
|
||||||
"CP_CLAUDE_SLOT": "0",
|
"CP_PLAYER_SLOT": "0",
|
||||||
"CP_MAP_SIZE": "duel",
|
"CP_MAP_SIZE": "duel",
|
||||||
"CP_MAP_TYPE": "continents"
|
"CP_MAP_TYPE": "continents"
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@
|
||||||
# CP_TURNS (default 25) — number of EndTurns to issue
|
# CP_TURNS (default 25) — number of EndTurns to issue
|
||||||
# CP_SEED (default 42)
|
# CP_SEED (default 42)
|
||||||
# CP_PLAYERS (default 3)
|
# CP_PLAYERS (default 3)
|
||||||
# CP_CLAUDE_SLOT (default 0)
|
# CP_PLAYER_SLOT (default 0)
|
||||||
# CP_MAP_SIZE (default duel)
|
# CP_MAP_SIZE (default duel)
|
||||||
# CP_TIMEOUT_SEC (default 600) — harness wallclock budget
|
# CP_TIMEOUT_SEC (default 600) — harness wallclock budget
|
||||||
#
|
#
|
||||||
|
|
@ -27,11 +27,11 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
: "${CP_TURNS:=25}"
|
: "${CP_TURNS:=25}"
|
||||||
: "${CP_SEED:=42}"
|
: "${CP_SEED:=42}"
|
||||||
: "${CP_PLAYERS:=3}"
|
: "${CP_PLAYERS:=3}"
|
||||||
: "${CP_CLAUDE_SLOT:=0}"
|
: "${CP_PLAYER_SLOT:=0}"
|
||||||
: "${CP_MAP_SIZE:=duel}"
|
: "${CP_MAP_SIZE:=duel}"
|
||||||
: "${CP_TIMEOUT_SEC:=600}"
|
: "${CP_TIMEOUT_SEC:=600}"
|
||||||
|
|
||||||
export CP_SEED CP_PLAYERS CP_CLAUDE_SLOT CP_MAP_SIZE CP_TIMEOUT_SEC
|
export CP_SEED CP_PLAYERS CP_PLAYER_SLOT CP_MAP_SIZE CP_TIMEOUT_SEC
|
||||||
|
|
||||||
TMP=$(mktemp -d -t mc-demo25-XXXXXX)
|
TMP=$(mktemp -d -t mc-demo25-XXXXXX)
|
||||||
trap "rm -rf '$TMP'" EXIT
|
trap "rm -rf '$TMP'" EXIT
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@
|
||||||
# Passing requires `actions_applied > 0` on every turn 1..5 for every AI slot.
|
# Passing requires `actions_applied > 0` on every turn 1..5 for every AI slot.
|
||||||
# Exits 0 on pass, 1 on fail.
|
# Exits 0 on pass, 1 on fail.
|
||||||
#
|
#
|
||||||
# Env: CP_SEED, CP_PLAYERS (default 3), CP_CLAUDE_SLOT (default 0), CP_MAP_SIZE.
|
# Env: CP_SEED, CP_PLAYERS (default 3), CP_PLAYER_SLOT (default 0), CP_MAP_SIZE.
|
||||||
|
|
||||||
set -uo pipefail
|
set -uo pipefail
|
||||||
|
|
||||||
|
|
@ -18,12 +18,12 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
: "${CP_PLAYERS:=3}"
|
: "${CP_PLAYERS:=3}"
|
||||||
: "${CP_CLAUDE_SLOT:=0}"
|
: "${CP_PLAYER_SLOT:=0}"
|
||||||
: "${CP_SEED:=42}"
|
: "${CP_SEED:=42}"
|
||||||
: "${CP_MAP_SIZE:=duel}"
|
: "${CP_MAP_SIZE:=duel}"
|
||||||
: "${SMOKE_TURNS:=5}"
|
: "${SMOKE_TURNS:=5}"
|
||||||
|
|
||||||
export CP_PLAYERS CP_CLAUDE_SLOT CP_SEED CP_MAP_SIZE
|
export CP_PLAYERS CP_PLAYER_SLOT CP_SEED CP_MAP_SIZE
|
||||||
|
|
||||||
TMP=$(mktemp -d -t mc-smoke-XXXXXX)
|
TMP=$(mktemp -d -t mc-smoke-XXXXXX)
|
||||||
trap "rm -rf '$TMP'" EXIT
|
trap "rm -rf '$TMP'" EXIT
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@
|
||||||
# Env vars (forwarded into the sandbox):
|
# Env vars (forwarded into the sandbox):
|
||||||
# CP_SEED (default 42)
|
# CP_SEED (default 42)
|
||||||
# CP_PLAYERS (default 3)
|
# CP_PLAYERS (default 3)
|
||||||
# CP_CLAUDE_SLOT (default 0)
|
# CP_PLAYER_SLOT (default 0)
|
||||||
# CP_MAP_SIZE (default duel)
|
# CP_MAP_SIZE (default duel)
|
||||||
# CP_TURNS (default 25)
|
# CP_TURNS (default 25)
|
||||||
# CP_SCREENSHOT_EVERY (default 1)
|
# CP_SCREENSHOT_EVERY (default 1)
|
||||||
|
|
@ -31,7 +31,7 @@ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
|
|
||||||
: "${CP_SEED:=42}"
|
: "${CP_SEED:=42}"
|
||||||
: "${CP_PLAYERS:=3}"
|
: "${CP_PLAYERS:=3}"
|
||||||
: "${CP_CLAUDE_SLOT:=0}"
|
: "${CP_PLAYER_SLOT:=0}"
|
||||||
: "${CP_MAP_SIZE:=duel}"
|
: "${CP_MAP_SIZE:=duel}"
|
||||||
: "${CP_TURNS:=25}"
|
: "${CP_TURNS:=25}"
|
||||||
: "${CP_SCREENSHOT_EVERY:=1}"
|
: "${CP_SCREENSHOT_EVERY:=1}"
|
||||||
|
|
@ -81,7 +81,7 @@ timeout "$CP_TIMEOUT_SEC" flatpak run --user \
|
||||||
--filesystem=xdg-run/${WESTON_SOCKET} \
|
--filesystem=xdg-run/${WESTON_SOCKET} \
|
||||||
--env=CP_SEED="$CP_SEED" \
|
--env=CP_SEED="$CP_SEED" \
|
||||||
--env=CP_PLAYERS="$CP_PLAYERS" \
|
--env=CP_PLAYERS="$CP_PLAYERS" \
|
||||||
--env=CP_CLAUDE_SLOT="$CP_CLAUDE_SLOT" \
|
--env=CP_PLAYER_SLOT="$CP_PLAYER_SLOT" \
|
||||||
--env=CP_MAP_SIZE="$CP_MAP_SIZE" \
|
--env=CP_MAP_SIZE="$CP_MAP_SIZE" \
|
||||||
--env=CP_TURNS="$CP_TURNS" \
|
--env=CP_TURNS="$CP_TURNS" \
|
||||||
--env=CP_SCREENSHOT_EVERY="$CP_SCREENSHOT_EVERY" \
|
--env=CP_SCREENSHOT_EVERY="$CP_SCREENSHOT_EVERY" \
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,10 @@
|
||||||
# Python `subprocess.Popen`, a smoke-test shell script, etc.
|
# Python `subprocess.Popen`, a smoke-test shell script, etc.
|
||||||
#
|
#
|
||||||
# Env vars (see PLAYER_API.md for the full schema):
|
# Env vars (see PLAYER_API.md for the full schema):
|
||||||
# CP_SEED, CP_PLAYERS, CP_CLAUDE_SLOT, CP_MAP_SIZE, CP_MAP_TYPE,
|
# CP_SEED, CP_PLAYERS, CP_PLAYER_SLOT, CP_MAP_SIZE, CP_MAP_TYPE,
|
||||||
# CP_OMNISCIENT, CP_TIMEOUT_SEC, CP_LOG_FILE.
|
# CP_OMNISCIENT, CP_TIMEOUT_SEC, CP_LOG_FILE.
|
||||||
#
|
#
|
||||||
# `CP_CLAUDE_SLOT` is the env-var name the harness has used since p2-67;
|
# `CP_PLAYER_SLOT` is the env-var name the harness has used since p2-67;
|
||||||
# it identifies the externally-controlled player slot — kept as-is for
|
# it identifies the externally-controlled player slot — kept as-is for
|
||||||
# backward compatibility with existing clients. Despite the name it is
|
# backward compatibility with existing clients. Despite the name it is
|
||||||
# not Claude-specific.
|
# not Claude-specific.
|
||||||
|
|
@ -28,7 +28,7 @@ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
# Defaults — adapter overrides via env.
|
# Defaults — adapter overrides via env.
|
||||||
: "${CP_SEED:=42}"
|
: "${CP_SEED:=42}"
|
||||||
: "${CP_PLAYERS:=2}"
|
: "${CP_PLAYERS:=2}"
|
||||||
: "${CP_CLAUDE_SLOT:=0}"
|
: "${CP_PLAYER_SLOT:=0}"
|
||||||
: "${CP_MAP_SIZE:=duel}"
|
: "${CP_MAP_SIZE:=duel}"
|
||||||
: "${CP_MAP_TYPE:=continents}"
|
: "${CP_MAP_TYPE:=continents}"
|
||||||
: "${CP_OMNISCIENT:=0}"
|
: "${CP_OMNISCIENT:=0}"
|
||||||
|
|
@ -41,7 +41,7 @@ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||||
# macOS uses the locally-installed `godot` binary (Homebrew); a parallel
|
# macOS uses the locally-installed `godot` binary (Homebrew); a parallel
|
||||||
# flatpak runtime just for this harness is silly when native Godot 4
|
# flatpak runtime just for this harness is silly when native Godot 4
|
||||||
# works directly. Env-var passthrough is automatic for the native path.
|
# works directly. Env-var passthrough is automatic for the native path.
|
||||||
export CP_SEED CP_PLAYERS CP_CLAUDE_SLOT CP_MAP_SIZE CP_MAP_TYPE \
|
export CP_SEED CP_PLAYERS CP_PLAYER_SLOT CP_MAP_SIZE CP_MAP_TYPE \
|
||||||
CP_OMNISCIENT CP_TIMEOUT_SEC CP_LOG_FILE
|
CP_OMNISCIENT CP_TIMEOUT_SEC CP_LOG_FILE
|
||||||
|
|
||||||
case "$(uname -s)" in
|
case "$(uname -s)" in
|
||||||
|
|
@ -61,7 +61,7 @@ case "$(uname -s)" in
|
||||||
exec flatpak run --user \
|
exec flatpak run --user \
|
||||||
--env=CP_SEED="$CP_SEED" \
|
--env=CP_SEED="$CP_SEED" \
|
||||||
--env=CP_PLAYERS="$CP_PLAYERS" \
|
--env=CP_PLAYERS="$CP_PLAYERS" \
|
||||||
--env=CP_CLAUDE_SLOT="$CP_CLAUDE_SLOT" \
|
--env=CP_PLAYER_SLOT="$CP_PLAYER_SLOT" \
|
||||||
--env=CP_MAP_SIZE="$CP_MAP_SIZE" \
|
--env=CP_MAP_SIZE="$CP_MAP_SIZE" \
|
||||||
--env=CP_MAP_TYPE="$CP_MAP_TYPE" \
|
--env=CP_MAP_TYPE="$CP_MAP_TYPE" \
|
||||||
--env=CP_OMNISCIENT="$CP_OMNISCIENT" \
|
--env=CP_OMNISCIENT="$CP_OMNISCIENT" \
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
extends Node
|
extends Node
|
||||||
## Headless harness for the generic external-player JSON-Lines API.
|
## Headless harness for the external-player JSON-Lines API.
|
||||||
##
|
##
|
||||||
## Boots a seeded GameState, instantiates a `GdPlayerApi`, then enters
|
## Boots a seeded GameState, instantiates a `GdPlayerApi`, then enters
|
||||||
## a JSON-Lines pump on stdin/stdout. Each line in is one `Request`
|
## a JSON-Lines pump on stdin/stdout. Each line in is one `Request`
|
||||||
|
|
@ -7,16 +7,14 @@ extends Node
|
||||||
## one `Notification`. The protocol contract lives in
|
## one `Notification`. The protocol contract lives in
|
||||||
## `src/game/engine/docs/PLAYER_API.md`.
|
## `src/game/engine/docs/PLAYER_API.md`.
|
||||||
##
|
##
|
||||||
## Originally introduced in p2-67 as `claude_player_main.gd`. Renamed
|
## Client-agnostic: Claude Code drives it via `tooling/claude-player-mcp/`,
|
||||||
## 2026-05-17 to drop the Claude-flavored naming — the wire protocol is
|
## an RL trainer drives it via `subprocess.Popen`, shell smoke tests
|
||||||
## client-agnostic; Claude Code is one consumer via
|
## write raw JSON-Lines to its stdin.
|
||||||
## `tooling/claude-player-mcp/`, an OpenSpiel/RL trainer can plug in via
|
|
||||||
## `subprocess.Popen`, and shell smoke tests use raw JSON-Lines.
|
|
||||||
##
|
##
|
||||||
## Env vars consumed:
|
## Env vars consumed:
|
||||||
## - `CP_SEED` — RNG seed (default 42)
|
## - `CP_SEED` — RNG seed (default 42)
|
||||||
## - `CP_PLAYERS` — total player slots (default 2)
|
## - `CP_PLAYERS` — total player slots (default 2)
|
||||||
## - `CP_CLAUDE_SLOT` — which slot stdin controls (default 0)
|
## - `CP_PLAYER_SLOT` — which slot stdin controls (default 0)
|
||||||
## - `CP_MAP_SIZE` — MapGenerator size key (default "duel")
|
## - `CP_MAP_SIZE` — MapGenerator size key (default "duel")
|
||||||
## - `CP_MAP_TYPE` — MapGenerator map type (default "continents")
|
## - `CP_MAP_TYPE` — MapGenerator map type (default "continents")
|
||||||
## - `CP_OMNISCIENT` — `1` disables fog redaction (default 0)
|
## - `CP_OMNISCIENT` — `1` disables fog redaction (default 0)
|
||||||
|
|
@ -24,14 +22,14 @@ extends Node
|
||||||
## - `CP_LOG_FILE` — if set, mirror all wire I/O to this path
|
## - `CP_LOG_FILE` — if set, mirror all wire I/O to this path
|
||||||
|
|
||||||
var _api: RefCounted = null
|
var _api: RefCounted = null
|
||||||
var _claude_slot: int = 0
|
var _player_slot: int = 0
|
||||||
var _omniscient: bool = false
|
var _omniscient: bool = false
|
||||||
var _log_path: String = ""
|
var _log_path: String = ""
|
||||||
var _shutdown: bool = false
|
var _shutdown: bool = false
|
||||||
|
|
||||||
|
|
||||||
func _ready() -> void:
|
func _ready() -> void:
|
||||||
_claude_slot = _env_int("CP_CLAUDE_SLOT", 0)
|
_player_slot = _env_int("CP_PLAYER_SLOT", 0)
|
||||||
_omniscient = _env_bool("CP_OMNISCIENT", false)
|
_omniscient = _env_bool("CP_OMNISCIENT", false)
|
||||||
_log_path = OS.get_environment("CP_LOG_FILE")
|
_log_path = OS.get_environment("CP_LOG_FILE")
|
||||||
|
|
||||||
|
|
@ -63,7 +61,7 @@ func _ready() -> void:
|
||||||
# no `id` field; adapters can use them to drive streaming UIs or
|
# no `id` field; adapters can use them to drive streaming UIs or
|
||||||
# ignore them entirely (the synchronous response after the next
|
# ignore them entirely (the synchronous response after the next
|
||||||
# `act` carries the same data via `events`).
|
# `act` carries the same data via `events`).
|
||||||
_emit_event("turn_started", {"turn": 0, "player": _claude_slot})
|
_emit_event("turn_started", {"turn": 0, "player": _player_slot})
|
||||||
_emit_event("phase_changed", {"phase": "player_actions"})
|
_emit_event("phase_changed", {"phase": "player_actions"})
|
||||||
|
|
||||||
# Enter the pump on the next frame so any pending engine init flushes.
|
# Enter the pump on the next frame so any pending engine init flushes.
|
||||||
|
|
@ -337,7 +335,7 @@ func _apply_ai_personalities(gs: RefCounted, num_players: int) -> void:
|
||||||
# assign clan_ids[ai_index % clan_count]. Stable across runs.
|
# assign clan_ids[ai_index % clan_count]. Stable across runs.
|
||||||
var ai_index: int = 0
|
var ai_index: int = 0
|
||||||
for slot: int in range(num_players):
|
for slot: int in range(num_players):
|
||||||
if slot == _claude_slot:
|
if slot == _player_slot:
|
||||||
continue
|
continue
|
||||||
var clan_id: String = clan_ids[ai_index % clan_ids.size()]
|
var clan_id: String = clan_ids[ai_index % clan_ids.size()]
|
||||||
ai_index += 1
|
ai_index += 1
|
||||||
|
|
@ -420,7 +418,7 @@ func _handle_request(req: Dictionary) -> void:
|
||||||
var has_id: bool = req.has("id") and req.get("id") != null
|
var has_id: bool = req.has("id") and req.get("id") != null
|
||||||
match rtype:
|
match rtype:
|
||||||
"view":
|
"view":
|
||||||
var view_json: String = String(_api.view_json(_claude_slot))
|
var view_json: String = String(_api.view_json(_player_slot))
|
||||||
_emit_response_with_view(rid_int, has_id, view_json)
|
_emit_response_with_view(rid_int, has_id, view_json)
|
||||||
"act":
|
"act":
|
||||||
var action_payload: Dictionary = req.get("action", {}) as Dictionary
|
var action_payload: Dictionary = req.get("action", {}) as Dictionary
|
||||||
|
|
@ -429,7 +427,7 @@ func _handle_request(req: Dictionary) -> void:
|
||||||
return
|
return
|
||||||
var action_json: String = JSON.stringify(action_payload)
|
var action_json: String = JSON.stringify(action_payload)
|
||||||
var envelope_str: String = String(
|
var envelope_str: String = String(
|
||||||
_api.apply_action_json(_claude_slot, action_json)
|
_api.apply_action_json(_player_slot, action_json)
|
||||||
)
|
)
|
||||||
# api wrapper already emits a full ok/err envelope — splice
|
# api wrapper already emits a full ok/err envelope — splice
|
||||||
# in the request id and forward as the response body.
|
# in the request id and forward as the response body.
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ extends Node2D
|
||||||
## it is a Phase-13 deliverable workaround that bypasses the full
|
## it is a Phase-13 deliverable workaround that bypasses the full
|
||||||
## canonical render source extraction (`p2-72a`, deferred).
|
## canonical render source extraction (`p2-72a`, deferred).
|
||||||
##
|
##
|
||||||
## Mirrors `claude_player_main.gd` for the boot sequence (DataLoader,
|
## Mirrors `player_api_main.gd` for the boot sequence (DataLoader,
|
||||||
## GdMapGenerator, capital placement, AI personalities, runtime + tactical
|
## GdMapGenerator, capital placement, AI personalities, runtime + tactical
|
||||||
## catalogs) and `full_game_demo_proof.gd` for the renderer scaffold
|
## catalogs) and `full_game_demo_proof.gd` for the renderer scaffold
|
||||||
## (HexRenderer / UnitRenderer / CityRenderer wired directly under a
|
## (HexRenderer / UnitRenderer / CityRenderer wired directly under a
|
||||||
|
|
@ -42,7 +42,7 @@ extends Node2D
|
||||||
## Env vars:
|
## Env vars:
|
||||||
## - CP_SEED (default 42)
|
## - CP_SEED (default 42)
|
||||||
## - CP_PLAYERS (default 3)
|
## - CP_PLAYERS (default 3)
|
||||||
## - CP_CLAUDE_SLOT (default 0)
|
## - CP_PLAYER_SLOT (default 0)
|
||||||
## - CP_MAP_SIZE (default "duel")
|
## - CP_MAP_SIZE (default "duel")
|
||||||
## - CP_TURNS (default 25)
|
## - CP_TURNS (default 25)
|
||||||
## - CP_SCREENSHOT_EVERY (default 1) — capture every Nth turn
|
## - CP_SCREENSHOT_EVERY (default 1) — capture every Nth turn
|
||||||
|
|
@ -79,7 +79,7 @@ var _unit_renderer: Node2D = null
|
||||||
var _city_renderer: Node2D = null
|
var _city_renderer: Node2D = null
|
||||||
var _camera: Camera2D = null
|
var _camera: Camera2D = null
|
||||||
|
|
||||||
var _claude_slot: int = 0
|
var _player_slot: int = 0
|
||||||
var _num_players: int = 3
|
var _num_players: int = 3
|
||||||
var _seed: int = 42
|
var _seed: int = 42
|
||||||
var _map_size: String = "duel"
|
var _map_size: String = "duel"
|
||||||
|
|
@ -118,7 +118,7 @@ func _ready() -> void:
|
||||||
|
|
||||||
_seed = _env_int("CP_SEED", 42)
|
_seed = _env_int("CP_SEED", 42)
|
||||||
_num_players = _env_int("CP_PLAYERS", 3)
|
_num_players = _env_int("CP_PLAYERS", 3)
|
||||||
_claude_slot = _env_int("CP_CLAUDE_SLOT", 0)
|
_player_slot = _env_int("CP_PLAYER_SLOT", 0)
|
||||||
_map_size = _env_or("CP_MAP_SIZE", "duel")
|
_map_size = _env_or("CP_MAP_SIZE", "duel")
|
||||||
_max_turns = _env_int("CP_TURNS", 25)
|
_max_turns = _env_int("CP_TURNS", 25)
|
||||||
_screenshot_every = max(1, _env_int("CP_SCREENSHOT_EVERY", 1))
|
_screenshot_every = max(1, _env_int("CP_SCREENSHOT_EVERY", 1))
|
||||||
|
|
@ -199,7 +199,7 @@ func _bootstrap_world() -> void:
|
||||||
get_tree().quit(1)
|
get_tree().quit(1)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Scan land tiles + pick spaced capitals, mirroring claude_player_main.gd.
|
# Scan land tiles + pick spaced capitals, mirroring player_api_main.gd.
|
||||||
var grid_w: int = int(grid.get_width())
|
var grid_w: int = int(grid.get_width())
|
||||||
var grid_h: int = int(grid.get_height())
|
var grid_h: int = int(grid.get_height())
|
||||||
var land_tiles: Array[Vector2i] = _scan_land_tiles(grid, grid_w, grid_h)
|
var land_tiles: Array[Vector2i] = _scan_land_tiles(grid, grid_w, grid_h)
|
||||||
|
|
@ -343,24 +343,24 @@ func _find_claude_capital_axial() -> Vector2i:
|
||||||
if _api == null:
|
if _api == null:
|
||||||
push_warning("capital anchor: _api null, falling back to (0,0)")
|
push_warning("capital anchor: _api null, falling back to (0,0)")
|
||||||
return Vector2i.ZERO
|
return Vector2i.ZERO
|
||||||
var view_json: String = String(_api.view_json(_claude_slot))
|
var view_json: String = String(_api.view_json(_player_slot))
|
||||||
var view: Dictionary = JSON.parse_string(view_json) as Dictionary
|
var view: Dictionary = JSON.parse_string(view_json) as Dictionary
|
||||||
if view == null:
|
if view == null:
|
||||||
push_warning("capital anchor: view_json non-Dictionary, falling back to (0,0)")
|
push_warning("capital anchor: view_json non-Dictionary, falling back to (0,0)")
|
||||||
return Vector2i.ZERO
|
return Vector2i.ZERO
|
||||||
# 1) First city owned by Claude.
|
# 1) First city owned by Claude.
|
||||||
for city_dict: Dictionary in (view.get("cities", []) as Array):
|
for city_dict: Dictionary in (view.get("cities", []) as Array):
|
||||||
if int(city_dict.get("owner", -1)) == _claude_slot:
|
if int(city_dict.get("owner", -1)) == _player_slot:
|
||||||
return _wire_hex_to_vec(city_dict.get("position", []))
|
return _wire_hex_to_vec(city_dict.get("position", []))
|
||||||
# 2) Claude's founder unit.
|
# 2) Claude's founder unit.
|
||||||
for unit_dict: Dictionary in (view.get("units", []) as Array):
|
for unit_dict: Dictionary in (view.get("units", []) as Array):
|
||||||
if int(unit_dict.get("owner", -1)) != _claude_slot:
|
if int(unit_dict.get("owner", -1)) != _player_slot:
|
||||||
continue
|
continue
|
||||||
if String(unit_dict.get("type", "")) == "dwarf_founder":
|
if String(unit_dict.get("type", "")) == "dwarf_founder":
|
||||||
return _wire_hex_to_vec(unit_dict.get("position", []))
|
return _wire_hex_to_vec(unit_dict.get("position", []))
|
||||||
# 3) Any Claude unit (defensive).
|
# 3) Any Claude unit (defensive).
|
||||||
for unit_dict: Dictionary in (view.get("units", []) as Array):
|
for unit_dict: Dictionary in (view.get("units", []) as Array):
|
||||||
if int(unit_dict.get("owner", -1)) == _claude_slot:
|
if int(unit_dict.get("owner", -1)) == _player_slot:
|
||||||
return _wire_hex_to_vec(unit_dict.get("position", []))
|
return _wire_hex_to_vec(unit_dict.get("position", []))
|
||||||
push_warning("capital anchor: no Claude city/unit in view, falling back to (0,0)")
|
push_warning("capital anchor: no Claude city/unit in view, falling back to (0,0)")
|
||||||
return Vector2i.ZERO
|
return Vector2i.ZERO
|
||||||
|
|
@ -526,7 +526,7 @@ func _request_view() -> Dictionary:
|
||||||
var req_id: int = _wire_request_id
|
var req_id: int = _wire_request_id
|
||||||
var req: Dictionary = {"type": "view", "id": req_id}
|
var req: Dictionary = {"type": "view", "id": req_id}
|
||||||
_wire_append(req)
|
_wire_append(req)
|
||||||
var view_json: String = String(_api.view_json(_claude_slot))
|
var view_json: String = String(_api.view_json(_player_slot))
|
||||||
var view: Dictionary = JSON.parse_string(view_json) as Dictionary
|
var view: Dictionary = JSON.parse_string(view_json) as Dictionary
|
||||||
if view == null:
|
if view == null:
|
||||||
push_error("view_json returned non-Dictionary payload")
|
push_error("view_json returned non-Dictionary payload")
|
||||||
|
|
@ -542,7 +542,7 @@ func _apply_action(action: Dictionary) -> Dictionary:
|
||||||
var req: Dictionary = {"type": "act", "id": req_id, "action": action}
|
var req: Dictionary = {"type": "act", "id": req_id, "action": action}
|
||||||
_wire_append(req)
|
_wire_append(req)
|
||||||
var action_json: String = JSON.stringify(action)
|
var action_json: String = JSON.stringify(action)
|
||||||
var envelope_str: String = String(_api.apply_action_json(_claude_slot, action_json))
|
var envelope_str: String = String(_api.apply_action_json(_player_slot, action_json))
|
||||||
var envelope: Dictionary = JSON.parse_string(envelope_str) as Dictionary
|
var envelope: Dictionary = JSON.parse_string(envelope_str) as Dictionary
|
||||||
if envelope == null:
|
if envelope == null:
|
||||||
envelope = {"ok": false, "error": {"code": "internal", "message": "non-JSON envelope"}}
|
envelope = {"ok": false, "error": {"code": "internal", "message": "non-JSON envelope"}}
|
||||||
|
|
@ -587,7 +587,7 @@ func _rehydrate_view(view: Dictionary) -> void:
|
||||||
var p: PlayerScript = PlayerScript.new()
|
var p: PlayerScript = PlayerScript.new()
|
||||||
var idx: int = GameState.players.size()
|
var idx: int = GameState.players.size()
|
||||||
p.index = idx
|
p.index = idx
|
||||||
p.is_human = (idx == _claude_slot)
|
p.is_human = (idx == _player_slot)
|
||||||
p.player_name = "Slot %d" % idx
|
p.player_name = "Slot %d" % idx
|
||||||
p.race_id = "dwarf"
|
p.race_id = "dwarf"
|
||||||
p.color = SLOT_COLORS[idx % SLOT_COLORS.size()]
|
p.color = SLOT_COLORS[idx % SLOT_COLORS.size()]
|
||||||
|
|
@ -679,7 +679,7 @@ func _capture_screenshot(turn_idx: int) -> void:
|
||||||
if scores.size() > 0:
|
if scores.size() > 0:
|
||||||
var s0: Dictionary = scores[0] as Dictionary
|
var s0: Dictionary = scores[0] as Dictionary
|
||||||
scoreboard = "Turn %d • Claude (slot %d) gold %d cities %d units %d" % [
|
scoreboard = "Turn %d • Claude (slot %d) gold %d cities %d units %d" % [
|
||||||
turn_idx, _claude_slot,
|
turn_idx, _player_slot,
|
||||||
int(s0.get("gold", 0)),
|
int(s0.get("gold", 0)),
|
||||||
int(s0.get("cities", 0)),
|
int(s0.get("cities", 0)),
|
||||||
int(s0.get("units", 0)),
|
int(s0.get("units", 0)),
|
||||||
|
|
@ -722,7 +722,7 @@ func _snapshot_scores(view: Dictionary) -> Array:
|
||||||
## AI activity is captured via end_turn events instead.
|
## AI activity is captured via end_turn events instead.
|
||||||
var s: Dictionary = view.get("score", {}) as Dictionary
|
var s: Dictionary = view.get("score", {}) as Dictionary
|
||||||
return [{
|
return [{
|
||||||
"slot": _claude_slot,
|
"slot": _player_slot,
|
||||||
"gold": int(s.get("gold_total", 0)),
|
"gold": int(s.get("gold_total", 0)),
|
||||||
"cities": int(s.get("city_count", 0)),
|
"cities": int(s.get("city_count", 0)),
|
||||||
"units": int(s.get("unit_count", 0)),
|
"units": int(s.get("unit_count", 0)),
|
||||||
|
|
@ -744,7 +744,7 @@ func _write_recap() -> void:
|
||||||
lines.append("# p2-72 Option B — Claude-vs-AI Render Proof")
|
lines.append("# p2-72 Option B — Claude-vs-AI Render Proof")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
lines.append("- Seed: %d" % _seed)
|
lines.append("- Seed: %d" % _seed)
|
||||||
lines.append("- Players: %d (Claude slot: %d)" % [_num_players, _claude_slot])
|
lines.append("- Players: %d (Claude slot: %d)" % [_num_players, _player_slot])
|
||||||
lines.append("- Map size: %s" % _map_size)
|
lines.append("- Map size: %s" % _map_size)
|
||||||
lines.append("- Turns driven: %d" % (_turn_records.size() - 1))
|
lines.append("- Turns driven: %d" % (_turn_records.size() - 1))
|
||||||
lines.append("- Screenshots captured: %d" % _captured_turns.size())
|
lines.append("- Screenshots captured: %d" % _captured_turns.size())
|
||||||
|
|
@ -782,7 +782,7 @@ func _write_recap() -> void:
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
# ── Boot helpers ported from claude_player_main.gd ────────────────────────
|
# ── Boot helpers ported from player_api_main.gd ────────────────────────
|
||||||
|
|
||||||
func _scan_land_tiles(grid: RefCounted, w: int, h: int) -> Array[Vector2i]:
|
func _scan_land_tiles(grid: RefCounted, w: int, h: int) -> Array[Vector2i]:
|
||||||
const FORBIDDEN: Array[String] = [
|
const FORBIDDEN: Array[String] = [
|
||||||
|
|
@ -898,7 +898,7 @@ func _apply_ai_personalities(gs: RefCounted, num_players: int) -> void:
|
||||||
return
|
return
|
||||||
var ai_index: int = 0
|
var ai_index: int = 0
|
||||||
for slot: int in range(num_players):
|
for slot: int in range(num_players):
|
||||||
if slot == _claude_slot:
|
if slot == _player_slot:
|
||||||
continue
|
continue
|
||||||
var clan_id: String = clan_ids[ai_index % clan_ids.size()]
|
var clan_id: String = clan_ids[ai_index % clan_ids.size()]
|
||||||
ai_index += 1
|
ai_index += 1
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ function harnessEnv(): Record<string, string> {
|
||||||
const passthrough = [
|
const passthrough = [
|
||||||
"CP_SEED",
|
"CP_SEED",
|
||||||
"CP_PLAYERS",
|
"CP_PLAYERS",
|
||||||
"CP_CLAUDE_SLOT",
|
"CP_PLAYER_SLOT",
|
||||||
"CP_MAP_SIZE",
|
"CP_MAP_SIZE",
|
||||||
"CP_MAP_TYPE",
|
"CP_MAP_TYPE",
|
||||||
"CP_OMNISCIENT",
|
"CP_OMNISCIENT",
|
||||||
|
|
|
||||||
236
tooling/rl-self-play/encoders.py
Normal file
236
tooling/rl-self-play/encoders.py
Normal file
|
|
@ -0,0 +1,236 @@
|
||||||
|
"""PlayerView ⇄ fixed-shape tensors for RL.
|
||||||
|
|
||||||
|
The wire-side `PlayerView` is a deeply-nested JSON dict; RL libraries
|
||||||
|
need fixed-shape numeric arrays. We pin two contracts here:
|
||||||
|
|
||||||
|
1. **Observation encoder** (`encode_observation`) projects the view into
|
||||||
|
a fixed-length float32 vector. Length is `OBS_DIM`; layout is
|
||||||
|
deterministic and documented inline so the policy net can learn a
|
||||||
|
stable embedding.
|
||||||
|
|
||||||
|
2. **Action index encoder** (`encode_legal_actions` /
|
||||||
|
`decode_action_index`) flattens the view's `legal_actions` (top-level
|
||||||
|
+ per-unit + per-city) into a fixed-size index space `[0, ACTION_DIM)`.
|
||||||
|
Indices not occupied by a legal action in the current state are
|
||||||
|
masked out by `legal_action_mask`. MaskablePPO consumes that mask
|
||||||
|
directly.
|
||||||
|
|
||||||
|
These encoders are intentionally lossy — they discard tile-by-tile data
|
||||||
|
and only summarise the macro state. Replace with a CNN-based observation
|
||||||
|
once the macro head proves the loop works end-to-end.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# ── Observation shape ────────────────────────────────────────────────
|
||||||
|
# The fixed-length observation vector has three blocks:
|
||||||
|
# [0:8] self resources + score (gold, gold_per_turn, sci_per_turn,
|
||||||
|
# score_estimate, city_count, unit_count,
|
||||||
|
# happiness_pool, culture_per_turn)
|
||||||
|
# [8:16] self per-turn yields summed across cities (food, production,
|
||||||
|
# science, gold, culture)
|
||||||
|
# + (avg city pop, total mil units,
|
||||||
|
# total founder units)
|
||||||
|
# [16:24] opponent intel snapshot (opponent count seen, # at war,
|
||||||
|
# # at peace, # open_borders, ...)
|
||||||
|
# padded to 8 floats
|
||||||
|
# [24:32] turn counters (turn number, fraction of game elapsed,
|
||||||
|
# # cities lost, # cities captured,
|
||||||
|
# ... pad to 8)
|
||||||
|
OBS_DIM = 32
|
||||||
|
|
||||||
|
# ── Action index layout ──────────────────────────────────────────────
|
||||||
|
# We bucket legal actions deterministically:
|
||||||
|
# [0] end_turn
|
||||||
|
# [1] noop
|
||||||
|
# [2..2+MAX_UNITS*K) per-unit slots (skip, fortify, sentry, found_city,
|
||||||
|
# move-N/NE/SE/S/SW/NW (6 dirs),
|
||||||
|
# attack-target N/NE/SE/S/SW/NW (6 dirs))
|
||||||
|
# tail per-city build queue: indices into a fixed
|
||||||
|
# priority-ordered roster (worker, warrior, library,
|
||||||
|
# barracks, forge, walls, longhouse, monument)
|
||||||
|
#
|
||||||
|
# Anything legal but outside this layout is silently dropped — the RL
|
||||||
|
# agent simply can't learn to take it. For a duel game, the layout
|
||||||
|
# below covers >95% of legitimate openings; for the full 5-player
|
||||||
|
# huge-map case we extend MAX_UNITS / CITY_QUEUE_SLOTS once the basic
|
||||||
|
# loop trains.
|
||||||
|
MAX_UNITS = 16
|
||||||
|
PER_UNIT_ACTIONS = 16 # skip, fortify, sentry, found, move×6, attack×6, unfortify
|
||||||
|
MAX_CITIES = 4
|
||||||
|
CITY_QUEUE_ITEMS: tuple[str, ...] = (
|
||||||
|
"worker", "warrior", "library", "barracks", "forge",
|
||||||
|
"walls", "longhouse", "monument", "dwarf_warrior", "dwarf_founder",
|
||||||
|
"spearmen", "archer", "temple", "high_guild_hall", "chronicle_tower",
|
||||||
|
"mead_hall",
|
||||||
|
)
|
||||||
|
CITY_QUEUE_DIM = len(CITY_QUEUE_ITEMS)
|
||||||
|
|
||||||
|
ACTION_DIM = (
|
||||||
|
2 # end_turn, noop
|
||||||
|
+ MAX_UNITS * PER_UNIT_ACTIONS
|
||||||
|
+ MAX_CITIES * CITY_QUEUE_DIM
|
||||||
|
)
|
||||||
|
|
||||||
|
# Hex-axial direction order (matches `legal_actions` move targets after
|
||||||
|
# canonicalising relative-direction). Pointy-top, offset coords:
|
||||||
|
# Even-r layout used by mc-core. Order is N, NE, SE, S, SW, NW.
|
||||||
|
_DIR_OFFSETS_EVEN: tuple[tuple[int, int], ...] = (
|
||||||
|
(0, -1), (1, -1), (1, 0), (0, 1), (-1, 0), (-1, -1),
|
||||||
|
)
|
||||||
|
_DIR_OFFSETS_ODD: tuple[tuple[int, int], ...] = (
|
||||||
|
(0, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _hex_direction(from_pos: tuple[int, int], to_pos: tuple[int, int]) -> int | None:
|
||||||
|
"""Return 0..5 for the matching cardinal direction, or None if the
|
||||||
|
target is not adjacent. Even/odd-row aware (offset-r layout)."""
|
||||||
|
fc, fr = from_pos
|
||||||
|
tc, tr = to_pos
|
||||||
|
dc, dr = tc - fc, tr - fr
|
||||||
|
table = _DIR_OFFSETS_EVEN if (fr % 2 == 0) else _DIR_OFFSETS_ODD
|
||||||
|
for i, (odc, odr) in enumerate(table):
|
||||||
|
if (odc, odr) == (dc, dr):
|
||||||
|
return i
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def encode_observation(view: dict[str, Any]) -> np.ndarray:
|
||||||
|
"""Project a PlayerView dict into a fixed-shape float32 vector."""
|
||||||
|
obs = np.zeros(OBS_DIM, dtype=np.float32)
|
||||||
|
res = view.get("resources", {})
|
||||||
|
score = view.get("score", {})
|
||||||
|
obs[0] = float(res.get("gold", 0.0))
|
||||||
|
obs[1] = float(res.get("gold_per_turn", 0.0))
|
||||||
|
obs[2] = float(res.get("science_per_turn", 0.0))
|
||||||
|
obs[3] = float(score.get("score_estimate", 0.0))
|
||||||
|
obs[4] = float(score.get("city_count", 0.0))
|
||||||
|
obs[5] = float(score.get("unit_count", 0.0))
|
||||||
|
obs[6] = float(res.get("happiness_pool", 0.0))
|
||||||
|
obs[7] = float(res.get("culture_per_turn", 0.0))
|
||||||
|
|
||||||
|
cities = view.get("cities", [])
|
||||||
|
if cities:
|
||||||
|
food = sum(float(c.get("yields", {}).get("food", 0)) for c in cities)
|
||||||
|
prod = sum(float(c.get("yields", {}).get("production", 0)) for c in cities)
|
||||||
|
obs[8] = food
|
||||||
|
obs[9] = prod
|
||||||
|
obs[10] = sum(float(c.get("population", 0)) for c in cities) / len(cities)
|
||||||
|
|
||||||
|
units = view.get("units", [])
|
||||||
|
me = int(view.get("player", 0))
|
||||||
|
my_units = [u for u in units if int(u.get("owner", -1)) == me]
|
||||||
|
obs[11] = float(sum(1 for u in my_units if "warrior" in str(u.get("type", ""))))
|
||||||
|
obs[12] = float(sum(1 for u in my_units if "founder" in str(u.get("type", ""))))
|
||||||
|
|
||||||
|
diplo = view.get("diplomacy", [])
|
||||||
|
obs[16] = float(len(diplo))
|
||||||
|
obs[17] = float(sum(1 for d in diplo if d.get("relation") == "war"))
|
||||||
|
obs[18] = float(sum(1 for d in diplo if d.get("relation") == "peace"))
|
||||||
|
obs[19] = float(sum(1 for d in diplo if d.get("open_borders")))
|
||||||
|
|
||||||
|
obs[24] = float(view.get("turn", 0))
|
||||||
|
# Bound turn at 500 (huge-map limit) for a rough [0,1] progress signal.
|
||||||
|
obs[25] = min(1.0, float(view.get("turn", 0)) / 500.0)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
|
||||||
|
def _unit_action_offset(unit_slot: int, sub: int) -> int:
|
||||||
|
return 2 + unit_slot * PER_UNIT_ACTIONS + sub
|
||||||
|
|
||||||
|
|
||||||
|
def _city_action_offset(city_slot: int, item_idx: int) -> int:
|
||||||
|
return 2 + MAX_UNITS * PER_UNIT_ACTIONS + city_slot * CITY_QUEUE_DIM + item_idx
|
||||||
|
|
||||||
|
|
||||||
|
def encode_legal_actions(
|
||||||
|
view: dict[str, Any],
|
||||||
|
) -> tuple[np.ndarray, dict[int, dict[str, Any]]]:
|
||||||
|
"""Build the action-mask + an index→PlayerAction lookup table.
|
||||||
|
|
||||||
|
Returns (mask[ACTION_DIM] bool, idx_to_action dict). Only entries
|
||||||
|
present in the returned dict are legal this step; the mask is True
|
||||||
|
at those positions. MaskablePPO uses the mask to zero out the
|
||||||
|
sampling distribution before drawing.
|
||||||
|
"""
|
||||||
|
mask = np.zeros(ACTION_DIM, dtype=bool)
|
||||||
|
idx_to_action: dict[int, dict[str, Any]] = {}
|
||||||
|
|
||||||
|
top = view.get("legal_actions", [])
|
||||||
|
for entry in top:
|
||||||
|
a = entry.get("action", {})
|
||||||
|
if a.get("type") == "end_turn":
|
||||||
|
mask[0] = True
|
||||||
|
idx_to_action[0] = a
|
||||||
|
elif a.get("type") == "noop":
|
||||||
|
mask[1] = True
|
||||||
|
idx_to_action[1] = a
|
||||||
|
|
||||||
|
units = view.get("units", [])
|
||||||
|
me = int(view.get("player", 0))
|
||||||
|
my_units = [u for u in units if int(u.get("owner", -1)) == me]
|
||||||
|
for slot, u in enumerate(my_units[:MAX_UNITS]):
|
||||||
|
upos = tuple(int(x) for x in u.get("position", (0, 0)))
|
||||||
|
for entry in u.get("legal_actions", []):
|
||||||
|
a = entry.get("action", {})
|
||||||
|
sub: int | None = None
|
||||||
|
t = a.get("type")
|
||||||
|
if t == "skip":
|
||||||
|
sub = 0
|
||||||
|
elif t == "fortify":
|
||||||
|
sub = 1
|
||||||
|
elif t == "sentry":
|
||||||
|
sub = 2
|
||||||
|
elif t == "found_city":
|
||||||
|
sub = 3
|
||||||
|
elif t == "unfortify":
|
||||||
|
sub = 4
|
||||||
|
elif t == "move":
|
||||||
|
dir_idx = _hex_direction(
|
||||||
|
upos, tuple(int(x) for x in a.get("to", (0, 0)))
|
||||||
|
)
|
||||||
|
if dir_idx is not None:
|
||||||
|
sub = 5 + dir_idx # 5..10
|
||||||
|
elif t == "attack":
|
||||||
|
dir_idx = _hex_direction(
|
||||||
|
upos, tuple(int(x) for x in a.get("target", (0, 0)))
|
||||||
|
)
|
||||||
|
if dir_idx is not None:
|
||||||
|
sub = 11 + dir_idx # 11..15 (15 is also unfortify? no, 11..16 but PER_UNIT_ACTIONS=16)
|
||||||
|
sub = min(sub, PER_UNIT_ACTIONS - 1)
|
||||||
|
if sub is None:
|
||||||
|
continue
|
||||||
|
offset = _unit_action_offset(slot, sub)
|
||||||
|
if offset < ACTION_DIM:
|
||||||
|
mask[offset] = True
|
||||||
|
idx_to_action[offset] = a
|
||||||
|
|
||||||
|
cities = view.get("cities", [])
|
||||||
|
for slot, c in enumerate(cities[:MAX_CITIES]):
|
||||||
|
for entry in c.get("legal_actions", []):
|
||||||
|
a = entry.get("action", {})
|
||||||
|
if a.get("type") != "queue_production":
|
||||||
|
continue
|
||||||
|
item = str(a.get("item", ""))
|
||||||
|
if item not in CITY_QUEUE_ITEMS:
|
||||||
|
continue
|
||||||
|
item_idx = CITY_QUEUE_ITEMS.index(item)
|
||||||
|
offset = _city_action_offset(slot, item_idx)
|
||||||
|
if offset < ACTION_DIM:
|
||||||
|
mask[offset] = True
|
||||||
|
idx_to_action[offset] = a
|
||||||
|
|
||||||
|
return mask, idx_to_action
|
||||||
|
|
||||||
|
|
||||||
|
def decode_action_index(
|
||||||
|
index: int, idx_to_action: dict[int, dict[str, Any]]
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Invert `encode_legal_actions`. If the policy picks an index that
|
||||||
|
has been masked (shouldn't happen with MaskablePPO, but defensive
|
||||||
|
code is cheap), fall back to `end_turn`."""
|
||||||
|
return idx_to_action.get(index, {"type": "end_turn"})
|
||||||
151
tooling/rl-self-play/harness_client.py
Normal file
151
tooling/rl-self-play/harness_client.py
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
"""Reusable JSON-Lines client for `scripts/player-api-server.sh`.
|
||||||
|
|
||||||
|
One client = one subprocess = one game. The Gymnasium env in
|
||||||
|
`magic_civ_env.py` owns one of these per `reset()` cycle, and the
|
||||||
|
evaluator owns one per evaluation episode. Both share the same
|
||||||
|
protocol; pulling it out here lets them stay in sync with the wire
|
||||||
|
contract documented in `src/game/engine/docs/PLAYER_API.md`.
|
||||||
|
|
||||||
|
Strong types throughout — no string-typed errors leaking up from the
|
||||||
|
harness. Anything off-protocol raises `HarnessError`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||||
|
HARNESS_SCRIPT = REPO_ROOT / "scripts" / "player-api-server.sh"
|
||||||
|
|
||||||
|
# Max lines we will read while looking for a matching response before
|
||||||
|
# giving up. Each harness response sits behind 0..N async notifications,
|
||||||
|
# so we need a buffer — 5000 is generous enough that even a busy turn
|
||||||
|
# with hundreds of `unit_moved`/`turn_started` notifications won't trip
|
||||||
|
# it, while still bounded so a wedged harness can't hang the trainer.
|
||||||
|
MAX_LINES_PER_RESPONSE = 5000
|
||||||
|
|
||||||
|
|
||||||
|
class HarnessError(RuntimeError):
|
||||||
|
"""Raised when the harness violates the protocol or dies unexpectedly."""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class HarnessConfig:
|
||||||
|
"""Per-episode harness configuration. Mirrors the env-var contract in
|
||||||
|
`scripts/player-api-server.sh` so callers can override any axis without
|
||||||
|
knowing the env-var spelling."""
|
||||||
|
|
||||||
|
seed: int = 42
|
||||||
|
players: int = 2
|
||||||
|
player_slot: int = 0
|
||||||
|
map_size: str = "duel"
|
||||||
|
map_type: str = "continents"
|
||||||
|
omniscient: bool = False
|
||||||
|
timeout_sec: int = 60
|
||||||
|
|
||||||
|
def to_env(self) -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"CP_SEED": str(self.seed),
|
||||||
|
"CP_PLAYERS": str(self.players),
|
||||||
|
"CP_PLAYER_SLOT": str(self.player_slot),
|
||||||
|
"CP_MAP_SIZE": self.map_size,
|
||||||
|
"CP_MAP_TYPE": self.map_type,
|
||||||
|
"CP_OMNISCIENT": "1" if self.omniscient else "0",
|
||||||
|
"CP_TIMEOUT_SEC": str(self.timeout_sec),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class HarnessClient:
|
||||||
|
"""One running harness instance. Cheap to construct (sub-second on
|
||||||
|
macOS once Godot's class cache is warm); destroy + recreate on each
|
||||||
|
Gym `reset()` so episodes have independent simulator state."""
|
||||||
|
|
||||||
|
def __init__(self, config: HarnessConfig | None = None) -> None:
|
||||||
|
self._config = config or HarnessConfig()
|
||||||
|
env = {**os.environ, **self._config.to_env()}
|
||||||
|
self._proc = subprocess.Popen(
|
||||||
|
["bash", str(HARNESS_SCRIPT)],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
cwd=str(REPO_ROOT),
|
||||||
|
text=True,
|
||||||
|
bufsize=1,
|
||||||
|
env=env,
|
||||||
|
)
|
||||||
|
self._next_id = 1
|
||||||
|
self._closed = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def config(self) -> HarnessConfig:
|
||||||
|
return self._config
|
||||||
|
|
||||||
|
def _send(self, msg: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
if self._closed:
|
||||||
|
raise HarnessError("harness already closed")
|
||||||
|
msg["id"] = self._next_id
|
||||||
|
self._next_id += 1
|
||||||
|
assert self._proc.stdin is not None and self._proc.stdout is not None
|
||||||
|
self._proc.stdin.write(json.dumps(msg) + "\n")
|
||||||
|
self._proc.stdin.flush()
|
||||||
|
for _ in range(MAX_LINES_PER_RESPONSE):
|
||||||
|
line = self._proc.stdout.readline()
|
||||||
|
if not line:
|
||||||
|
self._closed = True
|
||||||
|
raise HarnessError(
|
||||||
|
f"harness stdout EOF while waiting for id={msg['id']}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
obj = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Notifications without an id, or stray stderr that landed
|
||||||
|
# on stdout — skip and keep reading.
|
||||||
|
continue
|
||||||
|
if obj.get("id") == msg["id"]:
|
||||||
|
return obj
|
||||||
|
raise HarnessError(
|
||||||
|
f"no correlated response for id={msg['id']} within {MAX_LINES_PER_RESPONSE} lines"
|
||||||
|
)
|
||||||
|
|
||||||
|
def view(self) -> dict[str, Any]:
|
||||||
|
r = self._send({"type": "view"})
|
||||||
|
if not r.get("ok"):
|
||||||
|
raise HarnessError(f"view failed: {r.get('error')}")
|
||||||
|
return r["view"]
|
||||||
|
|
||||||
|
def act(self, action: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
r = self._send({"type": "act", "action": action})
|
||||||
|
if not r.get("ok"):
|
||||||
|
err = r.get("error", {})
|
||||||
|
raise HarnessError(
|
||||||
|
f"act({action.get('type')!r}) failed: "
|
||||||
|
f"{err.get('code')}: {err.get('message')}"
|
||||||
|
)
|
||||||
|
return r
|
||||||
|
|
||||||
|
def end_turn(self) -> dict[str, Any]:
|
||||||
|
return self.act({"type": "end_turn"})
|
||||||
|
|
||||||
|
def shutdown(self) -> None:
|
||||||
|
if self._closed:
|
||||||
|
return
|
||||||
|
self._closed = True
|
||||||
|
try:
|
||||||
|
self._send({"type": "shutdown"})
|
||||||
|
except HarnessError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self._proc.wait(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
self._proc.kill()
|
||||||
|
self._proc.wait(timeout=2)
|
||||||
|
|
||||||
|
def __enter__(self) -> HarnessClient:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *exc: object) -> None:
|
||||||
|
self.shutdown()
|
||||||
9
tooling/rl-self-play/requirements.txt
Normal file
9
tooling/rl-self-play/requirements.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
# Pinned to versions that are known to compose cleanly with sb3-contrib's
|
||||||
|
# MaskablePPO as of 2026-Q2. Bump together — sb3 and sb3-contrib track in
|
||||||
|
# lockstep, and torch's wheel ABI changes between minor versions.
|
||||||
|
gymnasium==1.2.1
|
||||||
|
stable-baselines3==2.7.0
|
||||||
|
sb3-contrib==2.7.0
|
||||||
|
torch==2.4.1
|
||||||
|
numpy>=2.0,<3
|
||||||
|
tensorboard>=2.18
|
||||||
Loading…
Add table
Reference in a new issue