feat(game): ✨ persist wind_direction for climate fidelity
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
00e98329fa
commit
0763db8e2d
7 changed files with 127 additions and 16 deletions
|
|
@ -2,7 +2,7 @@
|
|||
id: p2-82
|
||||
title: Climate-input save-fidelity — persist (or re-derive) worldgen-static grid inputs across save/load
|
||||
priority: p2
|
||||
status: stub
|
||||
status: done
|
||||
scope: game1
|
||||
updated_at: 2026-06-09
|
||||
blocked_by: []
|
||||
|
|
|
|||
|
|
@ -62,6 +62,13 @@ static func to_dict(tile: Resource) -> Dictionary:
|
|||
data["quality_progress"] = tile.quality_progress
|
||||
if tile.wind_speed != 0.5:
|
||||
data["wind_speed"] = tile.wind_speed
|
||||
# p2-82: wind_direction is set at worldgen (wind_calculator.gd) and read by the
|
||||
# climate physics transport solver (mc-climate/physics.rs upwind_offset) — it
|
||||
# drives temperature/moisture transport and so the surface_water trajectory. It
|
||||
# is NOT re-derived on load, so without persisting it the climate trajectory
|
||||
# diverges after save/load. Default 0 omitted (sparse).
|
||||
if tile.wind_direction != 0:
|
||||
data["wind_direction"] = tile.wind_direction
|
||||
if tile.culture_pressure != 0.0:
|
||||
data["culture_pressure"] = tile.culture_pressure
|
||||
if tile.mana_density != 0.0:
|
||||
|
|
@ -152,6 +159,9 @@ static func from_dict(data: Dictionary) -> Resource: # Tile
|
|||
tile.quality = data.get("quality", 2)
|
||||
tile.quality_progress = data.get("quality_progress", 0)
|
||||
tile.wind_speed = data.get("wind_speed", 0.5)
|
||||
# p2-82: restore worldgen wind_direction (missing in pre-p2-82 saves → 0, the
|
||||
# old behaviour). See to_dict for why this gates climate-trajectory fidelity.
|
||||
tile.wind_direction = data.get("wind_direction", 0)
|
||||
tile.culture_pressure = data.get("culture_pressure", 0.0)
|
||||
tile.mana_density = data.get("mana_density", 0.0)
|
||||
tile.ley_line_count = data.get("ley_line_count", 0)
|
||||
|
|
|
|||
|
|
@ -326,12 +326,13 @@ func test_grid_accumulators_continue_trajectory_when_inputs_preserved() -> void:
|
|||
## A: uninterrupted, non-zero wind. B: run to SAVE_AT, continue on a grid that
|
||||
## carries grid_b's get_tile_dict state forward + the restored accumulators.
|
||||
##
|
||||
## NOTE (documented gap → p2-82): a REAL load does NOT preserve
|
||||
## tile.wind_direction (worldgen-only, not in tile_serializer, not re-derived),
|
||||
## which physics reads for transport (physics.rs:336/399) → surface_water. So
|
||||
## production surface_water still diverges — a pre-existing CLIMATE-INPUT
|
||||
## save-fidelity gap, NOT accumulator persistence. This test holds inputs
|
||||
## constant to isolate the accumulator-fix contribution.
|
||||
## NOTE: the climate INPUTS physics reads (incl. tile.wind_direction, the
|
||||
## transport driver at physics.rs:336/399 → surface_water) now persist via
|
||||
## tile_serializer (p2-82, CLOSED — round-trip locked by
|
||||
## test_climate_tile_sync.gd), so a real load preserves them. This test holds
|
||||
## inputs constant to isolate the accumulator-fix contribution; with both the
|
||||
## accumulators (p2-80) and wind (p2-82) persisted, the production continued
|
||||
## trajectory is byte-identical.
|
||||
var grid_a: RefCounted = WorldsimAccumulatorFixtures.apply_wind_field(_make_terrain_grid())
|
||||
var climate_a: RefCounted = WorldsimAccumulatorFixtures.make_climate_physics()
|
||||
var worldsim_a: RefCounted = WorldsimAccumulatorFixtures.make_bloom_worldsim()
|
||||
|
|
|
|||
|
|
@ -54,9 +54,9 @@ static func make_bloom_worldsim() -> RefCounted:
|
|||
|
||||
static func apply_wind_field(grid: RefCounted) -> RefCounted:
|
||||
## Stamp a deterministic NON-ZERO wind_direction per tile (0-5 hex edges) onto
|
||||
## an existing terrain grid. The continued-trajectory diagnostic needs a wind
|
||||
## field that drives transport, so the post-load wind reset measurably changes
|
||||
## the surface_water trajectory. Returns the same grid for call chaining.
|
||||
## an existing terrain grid. The continued-trajectory test needs a wind field
|
||||
## that drives transport so surface_water actually depends on it; wind now
|
||||
## persists across save/load via tile_serializer (p2-82). Returns the grid.
|
||||
for row: int in range(MAP_SIZE):
|
||||
for col: int in range(MAP_SIZE):
|
||||
var dir: int = (col + row) % 6
|
||||
|
|
@ -74,9 +74,9 @@ static func carry_inputs_forward(source: RefCounted) -> RefCounted:
|
|||
## restore_worldsim_accumulators_from_json is the SOLE source of those fields —
|
||||
## mirroring production, whose _sync_tiles_to_grid omits exactly these. This
|
||||
## keeps the continued-trajectory test non-tautological: the accumulator fix,
|
||||
## not the input carry, is what reproduces them. (A REAL load additionally loses
|
||||
## tile.wind_direction — worldgen-only, not persisted — the documented separate
|
||||
## climate-input save-fidelity gap.)
|
||||
## not the input carry, is what reproduces them. (Climate INPUTS including
|
||||
## tile.wind_direction now persist via tile_serializer — p2-82, closed — so a
|
||||
## real load preserves the inputs this fixture carries forward.)
|
||||
var grid2: RefCounted = GdGridState.create(MAP_SIZE, MAP_SIZE)
|
||||
for row: int in range(MAP_SIZE):
|
||||
for col: int in range(MAP_SIZE):
|
||||
|
|
|
|||
|
|
@ -159,3 +159,36 @@ func test_climate_accumulators_resave_without_play_is_lossless() -> void:
|
|||
payload,
|
||||
"re-save before first played turn must return the loaded payload byte-stable"
|
||||
)
|
||||
|
||||
|
||||
func test_wind_direction_survives_tile_serializer_round_trip() -> void:
|
||||
## p2-82: wind_direction is a worldgen-set climate INPUT the physics transport
|
||||
## solver reads (mc-climate/physics.rs upwind_offset → temperature/moisture →
|
||||
## surface_water). It is not re-derived on load, so it MUST round-trip through
|
||||
## tile_serializer or the climate trajectory diverges after save/load. Combined
|
||||
## with the control proof (carrying wind forward → byte-identical surface_water
|
||||
## trajectory, in test_worldsim_playable_path.gd), this closes the p2-82 gap.
|
||||
var tile: Tile = Tile.new(Vector2i(3, 4), "grassland")
|
||||
tile.wind_direction = 5
|
||||
var data: Dictionary = TileSerializer.to_dict(tile)
|
||||
assert_eq(data.get("wind_direction"), 5, "wind_direction must be serialized")
|
||||
# Route through the REAL save serialization hop (JSON.stringify → parse_string),
|
||||
# which floats every number (5 → 5.0) — the exact hop behind the production_cost
|
||||
# bug. wind_direction lands back in a GDScript int so this must still restore 5.
|
||||
var round_tripped: Dictionary = JSON.parse_string(JSON.stringify(data))
|
||||
var restored: Resource = TileSerializer.from_dict(round_tripped)
|
||||
assert_eq(
|
||||
restored.wind_direction, 5, "wind_direction must restore after a full JSON round-trip"
|
||||
)
|
||||
|
||||
|
||||
func test_wind_direction_default_omitted_and_old_save_reads_zero() -> void:
|
||||
## Sparse + backward-compat: a default (0) wind_direction is omitted from the
|
||||
## dict, and a pre-p2-82 save (no wind_direction key) restores to 0 — the
|
||||
## previous behaviour, no regression for old saves.
|
||||
var tile: Tile = Tile.new(Vector2i.ZERO, "grassland")
|
||||
var data: Dictionary = TileSerializer.to_dict(tile)
|
||||
assert_false(data.has("wind_direction"), "default wind_direction (0) must be omitted")
|
||||
var old_save: Dictionary = {"position": [1, 1], "biome_id": "grassland"}
|
||||
var restored: Resource = TileSerializer.from_dict(old_save)
|
||||
assert_eq(restored.wind_direction, 0, "old save without wind_direction key reads 0")
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from __future__ import annotations
|
|||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
|
@ -99,11 +100,28 @@ class HarnessClient:
|
|||
def __init__(self, config: HarnessConfig | None = None) -> None:
|
||||
self._config = config or HarnessConfig()
|
||||
env = {**os.environ, **self._config.to_env()}
|
||||
# Capture the Godot subprocess stderr to a file when
|
||||
# MC_HARNESS_STDERR_DIR is set — otherwise DEVNULL (the shipping
|
||||
# default). Without this, a Godot boot/timeout failure under load
|
||||
# leaves only an opaque "stdout EOF" on the Python side with no
|
||||
# reason. Set MC_HARNESS_STDERR_DIR=<dir> for any long training run
|
||||
# so a harness death arrives diagnosed, not as a guess.
|
||||
self._stderr_file = None
|
||||
stderr_dir = os.environ.get("MC_HARNESS_STDERR_DIR", "")
|
||||
if stderr_dir:
|
||||
os.makedirs(stderr_dir, exist_ok=True)
|
||||
self._stderr_file = open(
|
||||
os.path.join(stderr_dir, f"harness_{os.getpid()}_{time.time_ns()}.err"),
|
||||
"w",
|
||||
)
|
||||
stderr_target = self._stderr_file
|
||||
else:
|
||||
stderr_target = subprocess.DEVNULL
|
||||
self._proc = subprocess.Popen(
|
||||
["bash", str(HARNESS_SCRIPT)],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
stderr=stderr_target,
|
||||
cwd=str(REPO_ROOT),
|
||||
text=True,
|
||||
bufsize=1,
|
||||
|
|
@ -223,6 +241,12 @@ class HarnessClient:
|
|||
except subprocess.TimeoutExpired:
|
||||
self._proc.kill()
|
||||
self._proc.wait(timeout=2)
|
||||
if self._stderr_file is not None:
|
||||
try:
|
||||
self._stderr_file.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._stderr_file = None
|
||||
|
||||
def __enter__(self) -> HarnessClient:
|
||||
return self
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ its win rate against this baseline; the policy is considered to have
|
|||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import replace
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -189,7 +190,6 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
|
|||
# dropped them, which would have un-declared the external slots.
|
||||
if seed is not None:
|
||||
cfg = replace(cfg, seed=seed)
|
||||
self._client = HarnessClient(cfg)
|
||||
self._terminated = False
|
||||
self._step_count = 0
|
||||
self._capital_by_player = {}
|
||||
|
|
@ -197,7 +197,18 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
|
|||
# Every configured slot starts alive. `cfg.players` is the total slot
|
||||
# count (learner + opponents); eliminations prune this set.
|
||||
self._live_players = set(range(int(cfg.players)))
|
||||
view = self._client.view(slot=self._slot_kw)
|
||||
# Bounded retry on the harness spawn + first view. Under heavy
|
||||
# concurrent load (16+ Godot workers in heavy-tests.slice with
|
||||
# CPUWeight=20, plus other jobs on the box), a freshly-spawned Godot
|
||||
# can lose the boot race and EOF on the first wire request — which,
|
||||
# un-retried, aborts a multi-hour training run from a single transient
|
||||
# worker death (observed: gen0 died at the first eval, 9 min in). We
|
||||
# fully reap the dead client and back off before respawning so a
|
||||
# competing worker finishing actually frees resources between tries.
|
||||
# A SYSTEMATIC failure (bad build, missing data) still surfaces: it
|
||||
# exhausts the retries and re-raises, and MC_HARNESS_STDERR_DIR
|
||||
# captures the Godot-side reason.
|
||||
view = self._spawn_with_retry(cfg)
|
||||
# Seed capitals from any cities present at game start. In duel
|
||||
# maps each player begins with a founder, so the capital map is
|
||||
# populated on the first CityFounded event per player (handled
|
||||
|
|
@ -211,6 +222,38 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
|
|||
self._sync_state(view)
|
||||
return encode_observation(view), {"action_mask": self._cur_mask.copy()}
|
||||
|
||||
def _spawn_with_retry(
|
||||
self, cfg: HarnessConfig, attempts: int = 3
|
||||
) -> dict[str, Any]:
|
||||
"""Spawn the harness and fetch the first view, retrying a transient
|
||||
boot-race EOF. Fully reaps a dead client before respawning, with a
|
||||
backoff so a competing worker can free resources between tries.
|
||||
Re-raises the last HarnessError after exhausting `attempts`."""
|
||||
last_err: HarnessError | None = None
|
||||
for attempt in range(attempts):
|
||||
try:
|
||||
self._client = HarnessClient(cfg)
|
||||
return self._client.view(slot=self._slot_kw)
|
||||
except HarnessError as e:
|
||||
last_err = e
|
||||
# Reap the half-dead client so we don't leak a scope and make
|
||||
# contention worse, then back off (1s, 2s, …) before respawn.
|
||||
if self._client is not None:
|
||||
try:
|
||||
self._client.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
self._client = None
|
||||
if attempt < attempts - 1:
|
||||
print(
|
||||
f"[MagicCivEnv] harness spawn attempt {attempt + 1}/"
|
||||
f"{attempts} failed ({e}); reaped + retrying",
|
||||
file=sys.stderr, flush=True,
|
||||
)
|
||||
time.sleep(1.0 * (attempt + 1))
|
||||
assert last_err is not None
|
||||
raise last_err
|
||||
|
||||
def step(
|
||||
self, action: np.int64 | int
|
||||
) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue