magicciv/tools/determinism-audit.sh

#!/usr/bin/env bash
# determinism-audit.sh — Audit AI determinism across the three canonical scenarios.
#
# TODO: re-run when RUN-host environment stabilizes. As of 2026-04-17, the RUN
# host has a `class_name Diplomacy` collision preventing TurnManager from
# compiling, so all autoplay games exit in_progress around turn 61. No
# determinism signal can be extracted until that's resolved. See task #5 /
# blocker thread.
#
# Scenarios (p0-20 / task T3):
#   1. CPU -> CPU: same seed twice with AI_GPU_ROLLOUT=false. Diff must be empty
#      (modulo timing/metadata allowlist).
#   2. CPU -> GPU: same seed on AI_GPU_ROLLOUT=false and =true. Integer fields
#      must match byte-for-byte; scalar floats within 1e-4.
#   3. Parallel batch: PARALLEL=10 twice — per-seed dirs identical (modulo timing).
#   (Process-restart determinism is OUT OF SCOPE here — owned by p1-09 autosave.
#    Do NOT add to this audit. See .project/objectives/p1-09-*.md when that lands.)
#
# Runs ON the RUN host via SSH (requires AUTOPLAY_HOST / PROJECT_ROOT_REMOTE).
# Writes report to .local/iter/determinism-audit-<timestamp>/summary.md on EDIT host.
#
# Exit codes:
#   0 — all enabled scenarios passed
#   1 — one or more scenarios failed
#   2 — usage / env error

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"

: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.lan)}"
: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"

STAMP="$(date +%Y%m%d_%H%M%S)"
AUDIT_DIR_LOCAL="$PROJECT_DIR/.local/iter/determinism-audit-$STAMP"
AUDIT_DIR_REMOTE="$PROJECT_ROOT_REMOTE/.local/iter/determinism-audit-$STAMP"
SUMMARY="$AUDIT_DIR_LOCAL/summary.md"

mkdir -p "$AUDIT_DIR_LOCAL"

# Scenario toggles (env-overridable so we can run scenarios individually)
RUN_CPU_CPU="${RUN_CPU_CPU:-true}"
RUN_CPU_GPU="${RUN_CPU_GPU:-true}"
RUN_PARALLEL="${RUN_PARALLEL:-true}"

SEED_COUNT="${SEED_COUNT:-3}"
TURN_LIMIT="${TURN_LIMIT:-150}"
FLOAT_TOL="${FLOAT_TOL:-0.0001}"

FAILURES=()

echo "# Determinism Audit — $STAMP" > "$SUMMARY"
echo "" >> "$SUMMARY"
echo "- Host: \`$AUTOPLAY_HOST\`" >> "$SUMMARY"
echo "- Seeds: $SEED_COUNT, turn_limit: $TURN_LIMIT, float tolerance: $FLOAT_TOL" >> "$SUMMARY"
echo "" >> "$SUMMARY"

_batch_remote() {
    # $1 remote_results_dir, $2 parallelism, $3 extra_env
    local remote_dir="$1"
    local par="$2"
    local extra_env="$3"
    ssh "$AUTOPLAY_HOST" "
        mkdir -p '$remote_dir'
        $extra_env PARALLEL=$par bash '$PROJECT_ROOT_REMOTE/tools/autoplay-batch.sh' \
            $SEED_COUNT $TURN_LIMIT '$remote_dir' > '$remote_dir/batch.log' 2>&1
    "
}

_diff_remote() {
    # $1 dir_a, $2 dir_b, $3 output diff path — excludes timing/log/stamp fields
    # turn_stats.jsonl and events.jsonl are the deterministic signals; game.log
    # and meta.json carry wall-clock/PID/timestamp data that legitimately varies.
    local a="$1" b="$2" out="$3"
    ssh "$AUTOPLAY_HOST" "
        diff -r \
            --exclude='game.log' \
            --exclude='batch.log' \
            --exclude='weston.log' \
            --exclude='meta.json' \
            '$a' '$b' > '$out' 2>&1 || true
    "
}

_fetch_remote_file() {
    # Pull a single remote text file back to EDIT host. Swallows errors since
    # the scenario assertion is what matters; a missing diff file is handled
    # upstream via `[ -s ... ]`.
    scp "$AUTOPLAY_HOST:$1" "$2" >/dev/null 2>&1 || true
}

_record() {
    # $1 scenario, $2 PASS|FAIL, $3 detail
    echo "## $1 — $2" >> "$SUMMARY"
    echo "" >> "$SUMMARY"
    echo "$3" >> "$SUMMARY"
    echo "" >> "$SUMMARY"
    if [ "$2" = "FAIL" ]; then
        FAILURES+=("$1")
    fi
}

# ── Scenario 1: CPU → CPU (serial, PARALLEL=1) ──────────────────────────────
# Run serially so Scenario 1 isolates RNG determinism from parallel dispatch.
# Scenario 3 covers parallel dispatch determinism separately.
if [ "$RUN_CPU_CPU" = "true" ]; then
    echo "Running Scenario 1: CPU → CPU (serial)..."
    _batch_remote "$AUDIT_DIR_REMOTE/cpu-cpu-run1" 1 "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"
    _batch_remote "$AUDIT_DIR_REMOTE/cpu-cpu-run2" 1 "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"
    _diff_remote "$AUDIT_DIR_REMOTE/cpu-cpu-run1" "$AUDIT_DIR_REMOTE/cpu-cpu-run2" \
        "$AUDIT_DIR_REMOTE/cpu-cpu.diff"
    _fetch_remote_file "$AUDIT_DIR_REMOTE/cpu-cpu.diff" "$AUDIT_DIR_LOCAL/cpu-cpu.diff"
    if [ -s "$AUDIT_DIR_LOCAL/cpu-cpu.diff" ]; then
        _record "Scenario 1 — CPU → CPU" "FAIL" \
"Non-empty diff — see \`cpu-cpu.diff\`. First lines:

\`\`\`
$(head -20 "$AUDIT_DIR_LOCAL/cpu-cpu.diff")
\`\`\`"
    else
        _record "Scenario 1 — CPU → CPU" "PASS" "Empty diff across $SEED_COUNT seed(s) (serial)."
    fi
fi

# ── Scenario 2: CPU → GPU (integer-byte-equal + float tolerance) ────────────
# Integer fields (pop, gold, winner_id, etc.) must match byte-for-byte; scalar
# floats are allowed to diverge within FLOAT_TOL. Delegates the per-seed
# tolerance check to determinism-compare.py (lives alongside this script).
if [ "$RUN_CPU_GPU" = "true" ]; then
    echo "Running Scenario 2: CPU → GPU..."
    _batch_remote "$AUDIT_DIR_REMOTE/cpu-gpu-cpu" "$SEED_COUNT" "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"
    _batch_remote "$AUDIT_DIR_REMOTE/cpu-gpu-gpu" "$SEED_COUNT" "AI_USE_MCTS=true AI_GPU_ROLLOUT=true"
    set +e
    ssh "$AUTOPLAY_HOST" "python3 '$PROJECT_ROOT_REMOTE/tools/determinism-compare.py' \
        '$AUDIT_DIR_REMOTE/cpu-gpu-cpu' '$AUDIT_DIR_REMOTE/cpu-gpu-gpu' \
        --float-tol $FLOAT_TOL > '$AUDIT_DIR_REMOTE/cpu-gpu.report' 2>&1"
    cpu_gpu_status=$?
    set -e
    _fetch_remote_file "$AUDIT_DIR_REMOTE/cpu-gpu.report" "$AUDIT_DIR_LOCAL/cpu-gpu.report"
    if [ "$cpu_gpu_status" -ne 0 ]; then
        report_snippet="(see cpu-gpu.report)"
        [ -s "$AUDIT_DIR_LOCAL/cpu-gpu.report" ] && \
            report_snippet="$(head -10 "$AUDIT_DIR_LOCAL/cpu-gpu.report")"
        _record "Scenario 2 — CPU → GPU" "FAIL" \
"Parity check failed (exit $cpu_gpu_status):

\`\`\`
$report_snippet
\`\`\`"
    else
        _record "Scenario 2 — CPU → GPU" "PASS" \
            "Integer fields byte-equal; floats within $FLOAT_TOL across $SEED_COUNT seeds."
    fi
fi

# ── Scenario 3: Parallel batch determinism ──────────────────────────────────
# PARALLEL=SEED_COUNT dispatches all seeds concurrently; run twice and diff.
# If batch output is order-sensitive, seed ranges or RNG state-leakage will
# produce divergent turn_stats.jsonl across the two runs.
if [ "$RUN_PARALLEL" = "true" ]; then
    par=$SEED_COUNT
    [ "$par" -lt 2 ] && par=2
    echo "Running Scenario 3: Parallel batch (PARALLEL=$par)..."
    _batch_remote "$AUDIT_DIR_REMOTE/parallel-run1" "$par" "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"
    _batch_remote "$AUDIT_DIR_REMOTE/parallel-run2" "$par" "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"
    _diff_remote "$AUDIT_DIR_REMOTE/parallel-run1" "$AUDIT_DIR_REMOTE/parallel-run2" \
        "$AUDIT_DIR_REMOTE/parallel.diff"
    _fetch_remote_file "$AUDIT_DIR_REMOTE/parallel.diff" "$AUDIT_DIR_LOCAL/parallel.diff"
    if [ -s "$AUDIT_DIR_LOCAL/parallel.diff" ]; then
        _record "Scenario 3 — Parallel (PARALLEL=$par)" "FAIL" \
"Parallel batch diverged between runs — order-of-dispatch RNG leak? First lines:

\`\`\`
$(head -20 "$AUDIT_DIR_LOCAL/parallel.diff")
\`\`\`"
    else
        _record "Scenario 3 — Parallel (PARALLEL=$par)" "PASS" \
            "Parallel dispatch is deterministic across $SEED_COUNT seeds."
    fi
fi

# (Process-restart determinism lives in p1-09 / autosave — do not add here.)

# ── Summary ──────────────────────────────────────────────────────────────────
echo "" >> "$SUMMARY"
echo "---" >> "$SUMMARY"
if [ "${#FAILURES[@]}" -eq 0 ]; then
    echo "" >> "$SUMMARY"
    echo "**Result: ALL PASS**" >> "$SUMMARY"
    echo "Determinism audit PASSED — report: $SUMMARY"
    exit 0
else
    echo "" >> "$SUMMARY"
    echo "**Result: FAIL — ${#FAILURES[@]} scenario(s): ${FAILURES[*]}**" >> "$SUMMARY"
    echo "Determinism audit FAILED — report: $SUMMARY" >&2
    exit 1
fi
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`#!/usr/bin/env bash`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`# determinism-audit.sh — Audit AI determinism across the three canonical scenarios.`
			`#`
			`# TODO: re-run when RUN-host environment stabilizes. As of 2026-04-17, the RUN`
			# host has a `class_name Diplomacy` collision preventing TurnManager from
			`# compiling, so all autoplay games exit in_progress around turn 61. No`
			`# determinism signal can be extracted until that's resolved. See task #5 /`
			`# blocker thread.`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`#`
			`# Scenarios (p0-20 / task T3):`
			`# 1. CPU -> CPU: same seed twice with AI_GPU_ROLLOUT=false. Diff must be empty`
			`# (modulo timing/metadata allowlist).`
			`# 2. CPU -> GPU: same seed on AI_GPU_ROLLOUT=false and =true. Integer fields`
			`# must match byte-for-byte; scalar floats within 1e-4.`
			`# 3. Parallel batch: PARALLEL=10 twice — per-seed dirs identical (modulo timing).`
			`# (Process-restart determinism is OUT OF SCOPE here — owned by p1-09 autosave.`
			`# Do NOT add to this audit. See .project/objectives/p1-09-*.md when that lands.)`
			`#`
			`# Runs ON the RUN host via SSH (requires AUTOPLAY_HOST / PROJECT_ROOT_REMOTE).`
			`# Writes report to .local/iter/determinism-audit-<timestamp>/summary.md on EDIT host.`
			`#`
			`# Exit codes:`
			`# 0 — all enabled scenarios passed`
			`# 1 — one or more scenarios failed`
			`# 2 — usage / env error`

			`set -euo pipefail`

			`SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"`
			`PROJECT_DIR="$(dirname "$SCRIPT_DIR")"`

fix(@projects): 🐛 update deployment and guide workflows Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-06-10 03:38:03 -07:00			`: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.lan)}"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"`

			`STAMP="$(date +%Y%m%d_%H%M%S)"`
			`AUDIT_DIR_LOCAL="$PROJECT_DIR/.local/iter/determinism-audit-$STAMP"`
			`AUDIT_DIR_REMOTE="$PROJECT_ROOT_REMOTE/.local/iter/determinism-audit-$STAMP"`
			`SUMMARY="$AUDIT_DIR_LOCAL/summary.md"`

			`mkdir -p "$AUDIT_DIR_LOCAL"`

			`# Scenario toggles (env-overridable so we can run scenarios individually)`
			`RUN_CPU_CPU="${RUN_CPU_CPU:-true}"`
			`RUN_CPU_GPU="${RUN_CPU_GPU:-true}"`
			`RUN_PARALLEL="${RUN_PARALLEL:-true}"`

			`SEED_COUNT="${SEED_COUNT:-3}"`
			`TURN_LIMIT="${TURN_LIMIT:-150}"`
			`FLOAT_TOL="${FLOAT_TOL:-0.0001}"`

			`FAILURES=()`

			`echo "# Determinism Audit — $STAMP" > "$SUMMARY"`
			`echo "" >> "$SUMMARY"`
			echo "- Host: \`$AUTOPLAY_HOST\`" >> "$SUMMARY"
			`echo "- Seeds: $SEED_COUNT, turn_limit: $TURN_LIMIT, float tolerance: $FLOAT_TOL" >> "$SUMMARY"`
			`echo "" >> "$SUMMARY"`

			`_batch_remote() {`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`# $1 remote_results_dir, $2 parallelism, $3 extra_env`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`local remote_dir="$1"`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`local par="$2"`
			`local extra_env="$3"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`ssh "$AUTOPLAY_HOST" "`
			`mkdir -p '$remote_dir'`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`$extra_env PARALLEL=$par bash '$PROJECT_ROOT_REMOTE/tools/autoplay-batch.sh' \`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`$SEED_COUNT $TURN_LIMIT '$remote_dir' > '$remote_dir/batch.log' 2>&1`
			`"`
			`}`

			`_diff_remote() {`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`# $1 dir_a, $2 dir_b, $3 output diff path — excludes timing/log/stamp fields`
			`# turn_stats.jsonl and events.jsonl are the deterministic signals; game.log`
			`# and meta.json carry wall-clock/PID/timestamp data that legitimately varies.`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`local a="$1" b="$2" out="$3"`
			`ssh "$AUTOPLAY_HOST" "`
			`diff -r \`
			`--exclude='game.log' \`
			`--exclude='batch.log' \`
			`--exclude='weston.log' \`
			`--exclude='meta.json' \`
			`'$a' '$b' > '$out' 2>&1 \|\| true`
			`"`
			`}`

fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_fetch_remote_file() {`
			`# Pull a single remote text file back to EDIT host. Swallows errors since`
			`# the scenario assertion is what matters; a missing diff file is handled`
			# upstream via `[ -s ... ]`.
			`scp "$AUTOPLAY_HOST:$1" "$2" >/dev/null 2>&1 \|\| true`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`}`

			`_record() {`
			`# $1 scenario, $2 PASS\|FAIL, $3 detail`
			`echo "## $1 — $2" >> "$SUMMARY"`
			`echo "" >> "$SUMMARY"`
			`echo "$3" >> "$SUMMARY"`
			`echo "" >> "$SUMMARY"`
			`if [ "$2" = "FAIL" ]; then`
			`FAILURES+=("$1")`
			`fi`
			`}`

fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`# ── Scenario 1: CPU → CPU (serial, PARALLEL=1) ──────────────────────────────`
			`# Run serially so Scenario 1 isolates RNG determinism from parallel dispatch.`
			`# Scenario 3 covers parallel dispatch determinism separately.`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`if [ "$RUN_CPU_CPU" = "true" ]; then`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`echo "Running Scenario 1: CPU → CPU (serial)..."`
			`_batch_remote "$AUDIT_DIR_REMOTE/cpu-cpu-run1" 1 "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"`
			`_batch_remote "$AUDIT_DIR_REMOTE/cpu-cpu-run2" 1 "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`_diff_remote "$AUDIT_DIR_REMOTE/cpu-cpu-run1" "$AUDIT_DIR_REMOTE/cpu-cpu-run2" \`
			`"$AUDIT_DIR_REMOTE/cpu-cpu.diff"`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_fetch_remote_file "$AUDIT_DIR_REMOTE/cpu-cpu.diff" "$AUDIT_DIR_LOCAL/cpu-cpu.diff"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`if [ -s "$AUDIT_DIR_LOCAL/cpu-cpu.diff" ]; then`
			`_record "Scenario 1 — CPU → CPU" "FAIL" \`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			"Non-empty diff — see \`cpu-cpu.diff\`. First lines:

			\`\`\`
			`$(head -20 "$AUDIT_DIR_LOCAL/cpu-cpu.diff")`
			\`\`\`"
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`else`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_record "Scenario 1 — CPU → CPU" "PASS" "Empty diff across $SEED_COUNT seed(s) (serial)."`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`fi`
			`fi`

fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`# ── Scenario 2: CPU → GPU (integer-byte-equal + float tolerance) ────────────`
			`# Integer fields (pop, gold, winner_id, etc.) must match byte-for-byte; scalar`
			`# floats are allowed to diverge within FLOAT_TOL. Delegates the per-seed`
			`# tolerance check to determinism-compare.py (lives alongside this script).`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`if [ "$RUN_CPU_GPU" = "true" ]; then`
			`echo "Running Scenario 2: CPU → GPU..."`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_batch_remote "$AUDIT_DIR_REMOTE/cpu-gpu-cpu" "$SEED_COUNT" "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"`
			`_batch_remote "$AUDIT_DIR_REMOTE/cpu-gpu-gpu" "$SEED_COUNT" "AI_USE_MCTS=true AI_GPU_ROLLOUT=true"`
			`set +e`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`ssh "$AUTOPLAY_HOST" "python3 '$PROJECT_ROOT_REMOTE/tools/determinism-compare.py' \`
			`'$AUDIT_DIR_REMOTE/cpu-gpu-cpu' '$AUDIT_DIR_REMOTE/cpu-gpu-gpu' \`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`--float-tol $FLOAT_TOL > '$AUDIT_DIR_REMOTE/cpu-gpu.report' 2>&1"`
			`cpu_gpu_status=$?`
			`set -e`
			`_fetch_remote_file "$AUDIT_DIR_REMOTE/cpu-gpu.report" "$AUDIT_DIR_LOCAL/cpu-gpu.report"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`if [ "$cpu_gpu_status" -ne 0 ]; then`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`report_snippet="(see cpu-gpu.report)"`
			`[ -s "$AUDIT_DIR_LOCAL/cpu-gpu.report" ] && \`
			`report_snippet="$(head -10 "$AUDIT_DIR_LOCAL/cpu-gpu.report")"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`_record "Scenario 2 — CPU → GPU" "FAIL" \`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`"Parity check failed (exit $cpu_gpu_status):`

			\`\`\`
			`$report_snippet`
			\`\`\`"
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`else`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_record "Scenario 2 — CPU → GPU" "PASS" \`
			`"Integer fields byte-equal; floats within $FLOAT_TOL across $SEED_COUNT seeds."`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`fi`
			`fi`

			`# ── Scenario 3: Parallel batch determinism ──────────────────────────────────`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`# PARALLEL=SEED_COUNT dispatches all seeds concurrently; run twice and diff.`
			`# If batch output is order-sensitive, seed ranges or RNG state-leakage will`
			`# produce divergent turn_stats.jsonl across the two runs.`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`if [ "$RUN_PARALLEL" = "true" ]; then`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`par=$SEED_COUNT`
			`[ "$par" -lt 2 ] && par=2`
			`echo "Running Scenario 3: Parallel batch (PARALLEL=$par)..."`
			`_batch_remote "$AUDIT_DIR_REMOTE/parallel-run1" "$par" "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"`
			`_batch_remote "$AUDIT_DIR_REMOTE/parallel-run2" "$par" "AI_USE_MCTS=true AI_GPU_ROLLOUT=false"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`_diff_remote "$AUDIT_DIR_REMOTE/parallel-run1" "$AUDIT_DIR_REMOTE/parallel-run2" \`
			`"$AUDIT_DIR_REMOTE/parallel.diff"`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_fetch_remote_file "$AUDIT_DIR_REMOTE/parallel.diff" "$AUDIT_DIR_LOCAL/parallel.diff"`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`if [ -s "$AUDIT_DIR_LOCAL/parallel.diff" ]; then`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_record "Scenario 3 — Parallel (PARALLEL=$par)" "FAIL" \`
			`"Parallel batch diverged between runs — order-of-dispatch RNG leak? First lines:`

			\`\`\`
			`$(head -20 "$AUDIT_DIR_LOCAL/parallel.diff")`
			\`\`\`"
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`else`
fix(@projects/@magic-civilization): 🐛 audit ai gpu pipeline for determinism Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 02:21:14 -07:00			`_record "Scenario 3 — Parallel (PARALLEL=$par)" "PASS" \`
			`"Parallel dispatch is deterministic across $SEED_COUNT seeds."`
feat(@projects/@magic-civilization): ✨ implement wonder-tracking ai evaluation Co-Authored-By: Lilith Autocommit <noreply@atlilith.com> 2026-04-17 01:45:23 -07:00			`fi`
			`fi`

			`# (Process-restart determinism lives in p1-09 / autosave — do not add here.)`

			`# ── Summary ──────────────────────────────────────────────────────────────────`
			`echo "" >> "$SUMMARY"`
			`echo "---" >> "$SUMMARY"`
			`if [ "${#FAILURES[@]}" -eq 0 ]; then`
			`echo "" >> "$SUMMARY"`
			`echo "Result: ALL PASS" >> "$SUMMARY"`
			`echo "Determinism audit PASSED — report: $SUMMARY"`
			`exit 0`
			`else`
			`echo "" >> "$SUMMARY"`
			`echo "*Result: FAIL — ${#FAILURES[@]} scenario(s): ${FAILURES[]}**" >> "$SUMMARY"`
			`echo "Determinism audit FAILED — report: $SUMMARY" >&2`
			`exit 1`
			`fi`