magicciv/scripts/autoplay/test_save_resume.sh

178 lines
6.4 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# test_save_resume.sh — Byte-identical T100 turn_stats after save-at-T50 + resume.
#
# Runs three headless games on apricot, all with the same seed:
# control — straight run to T100, writes turn_stats.jsonl
# save_run — runs to T50, writes mid_run.save, quits (AUTO_PLAY_SAVE_AT=50)
# resume — loads mid_run.save, runs T51-T100, writes turn_stats.jsonl
#
# Pass: T100 turn_stats line from control == T100 line from resume
# Fail: any game crashes, save missing, or turn_stats differ
#
# Usage (from repo root, apricot must be SSH-reachable):
# AUTOPLAY_HOST=lilith@apricot.lan bash scripts/autoplay/test_save_resume.sh [seed]
#
# Without AUTOPLAY_HOST, runs locally via flatpak (Linux only).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
GAME_DIR="$REPO_ROOT/src/game"
SEED="${1:-42}"
BASE_DIR="$REPO_ROOT/.local/batches/save_resume_test"
STAMP="$(date -u +%Y%m%dT%H%M%SZ)"
TEST_DIR="$BASE_DIR/${STAMP}_seed${SEED}"
AUTOPLAY_HOST="${AUTOPLAY_HOST:-}"
TURN_LIMIT=100
SAVE_AT=50
SAFETY=$(( TURN_LIMIT * 3 + 120 ))
mkdir -p "$TEST_DIR"
log() { echo "[test_save_resume] $*"; }
fail() { echo "FAIL: $*" >&2; exit 1; }
# Retention: keep only the 3 most recent run dirs under $BASE_DIR.
prune_old_runs() {
[[ -d "$BASE_DIR" ]] || return 0
local old
mapfile -t old < <(ls -1dt "$BASE_DIR"/*_seed* 2>/dev/null | tail -n +4)
(( ${#old[@]} == 0 )) && return 0
log "pruning ${#old[@]} old run dir(s) (keeping 3 newest)"
for d in "${old[@]}"; do
log " rm -rf $d"
rm -rf "$d"
done
}
prune_old_runs
# ── Runner helpers ────────────────────────────────────────────────────────────
_run_local() {
local out_dir="$1"; shift
local extra_envs=("$@")
mkdir -p "$out_dir"
local flatpak_envs=(
"--env=AUTO_PLAY=true"
"--env=AUTO_PLAY_DIR=$out_dir"
"--env=AUTO_PLAY_SEED=$SEED"
"--env=AUTO_PLAY_TURN_LIMIT=$TURN_LIMIT"
)
for e in "${extra_envs[@]}"; do
flatpak_envs+=("--env=$e")
done
timeout "$SAFETY" flatpak run --user \
--filesystem=home \
"${flatpak_envs[@]}" \
org.godotengine.Godot \
--path "$GAME_DIR" --headless --rendering-method gl_compatibility \
>"$out_dir/game.log" 2>&1 || true
}
_run_remote() {
local out_dir="$1"; shift
local extra_envs=("$@")
local remote_dir="$HOME/Code/@projects/@magic-civilization/.local/batches/save_resume_test/${STAMP}_seed${SEED}/$(basename "$out_dir")"
ssh "$AUTOPLAY_HOST" "mkdir -p '$remote_dir'"
local ssh_envs="AUTO_PLAY_SEED=$SEED AUTO_PLAY_TURN_LIMIT=$TURN_LIMIT"
for e in "${extra_envs[@]}"; do
ssh_envs="$ssh_envs $e"
done
# shellcheck disable=SC2029
ssh "$AUTOPLAY_HOST" "
set -uo pipefail
cd \$HOME/Code/@projects/@magic-civilization/src/game
timeout $SAFETY flatpak run --user \
--filesystem=home \
--env=AUTO_PLAY=true \
--env=AUTO_PLAY_DIR='$remote_dir' \
$(for e in "${extra_envs[@]}"; do echo "--env=$e "; done) \
--env=AUTO_PLAY_SEED=$SEED \
--env=AUTO_PLAY_TURN_LIMIT=$TURN_LIMIT \
org.godotengine.Godot \
--path . --headless --rendering-method gl_compatibility \
>'$remote_dir/game.log' 2>&1 || true
"
# Pull results back
mkdir -p "$out_dir"
scp -r "$AUTOPLAY_HOST:$remote_dir/." "$out_dir/"
}
run_game() {
local out_dir="$1"; shift
if [[ -n "$AUTOPLAY_HOST" ]]; then
_run_remote "$out_dir" "$@"
else
_run_local "$out_dir" "$@"
fi
}
# ── Phase 1: control run (T1-T100) ───────────────────────────────────────────
CONTROL_DIR="$TEST_DIR/control"
log "Phase 1: control run seed=$SEED T1T100 → $CONTROL_DIR"
run_game "$CONTROL_DIR"
[[ -f "$CONTROL_DIR/turn_stats.jsonl" ]] || fail "control: turn_stats.jsonl missing"
CONTROL_T100=$(grep '"turn":100,' "$CONTROL_DIR/turn_stats.jsonl" | tail -1)
[[ -n "$CONTROL_T100" ]] || fail "control: no T100 line in turn_stats.jsonl"
log "control T100 line: $CONTROL_T100"
# ── Phase 2: save run (T1-T50, writes mid_run.save) ──────────────────────────
SAVE_DIR_PATH="$TEST_DIR/save_run"
log "Phase 2: save run seed=$SEED T1T$SAVE_AT$SAVE_DIR_PATH"
run_game "$SAVE_DIR_PATH" "AUTO_PLAY_SAVE_AT=$SAVE_AT"
[[ -f "$SAVE_DIR_PATH/mid_run.save" ]] || fail "save_run: mid_run.save not written"
log "mid_run.save present ($(wc -c < "$SAVE_DIR_PATH/mid_run.save") bytes)"
# Determine save path accessible to the resume run
if [[ -n "$AUTOPLAY_HOST" ]]; then
REMOTE_BASE="$HOME/Code/@projects/@magic-civilization/.local/batches/save_resume_test/${STAMP}_seed${SEED}"
RESUME_SAVE_PATH="$REMOTE_BASE/save_run/mid_run.save"
else
RESUME_SAVE_PATH="$SAVE_DIR_PATH/mid_run.save"
fi
# ── Phase 3: resume run (load T50 save, play T51-T100) ───────────────────────
RESUME_DIR="$TEST_DIR/resume"
log "Phase 3: resume run seed=$SEED T$SAVE_ATT100 → $RESUME_DIR"
run_game "$RESUME_DIR" \
"AUTO_PLAY_LOAD_AUTOSAVE=$RESUME_SAVE_PATH"
[[ -f "$RESUME_DIR/turn_stats.jsonl" ]] || fail "resume: turn_stats.jsonl missing"
RESUME_T100=$(grep '"turn":100,' "$RESUME_DIR/turn_stats.jsonl" | tail -1)
[[ -n "$RESUME_T100" ]] || fail "resume: no T100 line in turn_stats.jsonl"
log "resume T100 line: $RESUME_T100"
# ── Diff ──────────────────────────────────────────────────────────────────────
# Strip wall_clock_sec (timing is non-deterministic) before comparing.
strip_timing() {
python3 -c "
import json, sys
line = json.loads(sys.stdin.read())
line.pop('wall_clock_sec', None)
print(json.dumps(line, sort_keys=True))
"
}
CONTROL_NORM=$(echo "$CONTROL_T100" | strip_timing)
RESUME_NORM=$(echo "$RESUME_T100" | strip_timing)
if [[ "$CONTROL_NORM" == "$RESUME_NORM" ]]; then
log "PASS: T100 turn_stats byte-identical between control and save-resume runs"
echo "$CONTROL_NORM" > "$TEST_DIR/t100_verified.json"
exit 0
else
log "FAIL: T100 turn_stats differ"
echo "--- control ---"
echo "$CONTROL_NORM" | python3 -m json.tool
echo "--- resume ---"
echo "$RESUME_NORM" | python3 -m json.tool
diff <(echo "$CONTROL_NORM" | python3 -m json.tool) \
<(echo "$RESUME_NORM" | python3 -m json.tool) || true
exit 1
fi