Adds scripts/green-pass.sh — the hardened-baseline gate: cargo nextest --workspace + all sim scenarios through the real resolver, exit 0 only when fully green. It is gating-aware: a scenario with "gating": false is run and reported but does not fail the baseline. Marks clan_fairness_band non-gating (owner decision): it measures SCRIPTED clan-personality balance (tech_rusher ~46%, 3 personalities at 0% winrate) — a real imbalance, but the project's answer is TRAINED/learned controllers, not scripted rebalancing. The 0.4 ceiling is left untuned so the gap stays visible. Fix path: train learned controllers toward the 6 clan types (docs/ai-roadmap.md). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
70 lines
3.6 KiB
Bash
Executable file
70 lines
3.6 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Hardened-baseline gate: the full horizontal green pass for the simulator.
|
|
#
|
|
# 1. cargo nextest run --workspace — every crate's unit + integration tests
|
|
# 2. all sim scenarios — combat set-pieces + fullgame invariants,
|
|
# run through the REAL sim_scenario resolver
|
|
#
|
|
# Gating: a scenario JSON with "gating": false is run and REPORTED but does not
|
|
# fail the gate (e.g. clan_fairness_band measures scripted-personality balance,
|
|
# which is intentionally superseded by trained/learned controllers — see
|
|
# docs/ai-roadmap.md). Every other scenario, and any workspace test failure,
|
|
# fails the gate.
|
|
#
|
|
# Designed to run ON a fleet worker (no local Rust toolchain on plum):
|
|
# ./run dist:up 1 && rsync this repo state up && ssh worker 'bash scripts/green-pass.sh'
|
|
# Exit 0 only when the baseline is fully green.
|
|
set -uo pipefail
|
|
|
|
REPO="${MC_REPO:-$HOME/Code/@projects/@magic-civilization}"
|
|
cd "$REPO" || { echo "no repo at $REPO" >&2; exit 2; }
|
|
. ~/.cargo/env 2>/dev/null || true
|
|
SCN_DIR="public/games/age-of-dwarves/data/sim-scenarios"
|
|
OUT="${MC_GREENPASS_OUT:-/tmp/green-pass}"
|
|
rm -rf "$OUT"; mkdir -p "$OUT"
|
|
echo "HEAD: $(git log --oneline -1 2>/dev/null)"
|
|
|
|
# ── [1/3] workspace tests ────────────────────────────────────────────────────
|
|
echo "########## [1/3] cargo nextest run --workspace ##########"
|
|
( cd "$REPO/src/simulator" && cargo nextest run --workspace --no-fail-fast ) > "$OUT/tests.log" 2>&1
|
|
TESTS_RC=$?
|
|
tail -4 "$OUT/tests.log"
|
|
echo "tests rc=$TESTS_RC"
|
|
|
|
# ── [2/3] build the scenario runner ──────────────────────────────────────────
|
|
echo "########## [2/3] build sim_scenario ##########"
|
|
( cd "$REPO/src/simulator" && cargo build --release -p mc-sim --bin sim_scenario ) > "$OUT/simbuild.log" 2>&1
|
|
SIMBIN="$(find "$REPO" -type f -name sim_scenario -path '*release*' 2>/dev/null | head -1)"
|
|
echo "sim_scenario: ${SIMBIN:-NOT FOUND}"
|
|
|
|
# ── [3/3] run scenarios (gating-aware) ───────────────────────────────────────
|
|
echo "########## [3/3] scenarios ##########"
|
|
gate_pass=0; gate_fail=0; nongate_fail=0; failed_gating=""; failed_nongating=""
|
|
if [ -x "$SIMBIN" ]; then
|
|
for f in "$SCN_DIR"/combat/*.json "$SCN_DIR"/fullgame/*.json; do
|
|
[ -e "$f" ] || continue
|
|
name="$(basename "$f" .json)"
|
|
gating="$(python3 -c "import json,sys;print(json.load(open('$f')).get('gating',True))" 2>/dev/null)"
|
|
if "$SIMBIN" "$f" > "$OUT/scn_$name.json" 2>"$OUT/scn_$name.err"; then
|
|
echo "PASS $name"; gate_pass=$((gate_pass+1))
|
|
elif [ "$gating" = "False" ]; then
|
|
echo "KNOWN $name (non-gating — does not fail the baseline)"; nongate_fail=$((nongate_fail+1)); failed_nongating="$failed_nongating $name"
|
|
else
|
|
echo "FAIL $name"; gate_fail=$((gate_fail+1)); failed_gating="$failed_gating $name"
|
|
fi
|
|
done
|
|
else
|
|
echo "sim_scenario not built — gate fails"; gate_fail=1
|
|
fi
|
|
|
|
echo "=================== SUMMARY ==================="
|
|
echo "workspace tests rc=$TESTS_RC"
|
|
echo "scenarios: $gate_pass gating-pass, $gate_fail gating-FAIL, $nongate_fail non-gating-known-fail"
|
|
[ -n "$failed_gating" ] && echo "GATING_FAILURES:$failed_gating"
|
|
[ -n "$failed_nongating" ] && echo "KNOWN_NONGATING:$failed_nongating"
|
|
|
|
if [ "$TESTS_RC" -eq 0 ] && [ "$gate_fail" -eq 0 ]; then
|
|
echo "GREEN_BASELINE: OK"; exit 0
|
|
else
|
|
echo "GREEN_BASELINE: FAILED"; exit 1
|
|
fi
|