magicciv/tools/e2e-determinism-check.sh

112 lines
3.3 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# e2e-determinism-check.sh — Post-flight gate for autoplay batch results.
#
# Usage: tools/e2e-determinism-check.sh <results_dir> [count]
#
# Asserts for each seed directory under <results_dir>:
# 1. turn_stats.jsonl exists and has ≥1 line
# 2. game.log contains no script-level ERRORs (allowlist excludes headless
# texture errors and TileMap out-of-bounds from the rendering dummy)
#
# Exit codes:
# 0 — all seeds passed
# 1 — one or more seeds failed the gate
# 2 — bad arguments / results_dir not found
set -euo pipefail
RESULTS_DIR="${1:-}"
COUNT="${2:-}"
if [ -z "$RESULTS_DIR" ] || [ ! -d "$RESULTS_DIR" ]; then
echo "Usage: $0 <results_dir> [count]" >&2
echo "ERROR: results_dir not found: '$RESULTS_DIR'" >&2
exit 2
fi
# Script errors that are known-benign in headless mode and do NOT indicate
# a game logic failure. These patterns are matched against ERROR lines.
ALLOWLISTED_ERRORS=(
"Parameter \"t\" is null" # headless viewport texture — no display
"texture_2d_get" # same; appears in dummy storage callsite
)
_is_allowlisted() {
local line="$1"
for pattern in "${ALLOWLISTED_ERRORS[@]}"; do
if [[ "$line" == *"$pattern"* ]]; then
return 0
fi
done
return 1
}
# Collect seed dirs (either named game_*_seed* or any subdir with game.log)
mapfile -t SEED_DIRS < <(find "$RESULTS_DIR" -maxdepth 1 -mindepth 1 -type d | sort)
if [ "${#SEED_DIRS[@]}" -eq 0 ]; then
echo "ERROR: no seed directories found under $RESULTS_DIR" >&2
exit 2
fi
if [ -n "$COUNT" ]; then
if [ "${#SEED_DIRS[@]}" -lt "$COUNT" ]; then
echo "ERROR: expected $COUNT seed dirs, found ${#SEED_DIRS[@]}" >&2
exit 1
fi
fi
FAILURES=()
for seed_dir in "${SEED_DIRS[@]}"; do
name="$(basename "$seed_dir")"
fail_reasons=()
# Gate 1: turn_stats.jsonl must exist and have ≥1 line
if [ ! -f "$seed_dir/turn_stats.jsonl" ]; then
fail_reasons+=("missing turn_stats.jsonl")
elif [ ! -s "$seed_dir/turn_stats.jsonl" ]; then
fail_reasons+=("empty turn_stats.jsonl")
fi
# Gate 2: game.log must exist
if [ ! -f "$seed_dir/game.log" ]; then
fail_reasons+=("missing game.log")
else
# Scan for non-allowlisted ERRORs
script_errors=()
while IFS= read -r line; do
if [[ "$line" =~ ^ERROR: ]]; then
if ! _is_allowlisted "$line"; then
script_errors+=("$line")
fi
fi
done < "$seed_dir/game.log"
if [ "${#script_errors[@]}" -gt 0 ]; then
fail_reasons+=("${#script_errors[@]} non-allowlisted script error(s) — first: ${script_errors[0]}")
fi
fi
if [ "${#fail_reasons[@]}" -gt 0 ]; then
echo "FAIL [$name]: ${fail_reasons[*]}" >&2
FAILURES+=("$name")
else
ts_lines="$(wc -l < "$seed_dir/turn_stats.jsonl" | tr -d ' ')"
echo "OK [$name]: $ts_lines turn_stats line(s)"
fi
done
echo ""
echo "============================================================"
PASS=$(( ${#SEED_DIRS[@]} - ${#FAILURES[@]} ))
echo "E2E gate: $PASS/${#SEED_DIRS[@]} passed"
echo "============================================================"
if [ "${#FAILURES[@]}" -gt 0 ]; then
echo "FAILED seeds: ${FAILURES[*]}" >&2
exit 1
fi
exit 0