#!/usr/bin/env bash # b5-aggregate.sh — Run p0-02 B5 50-game sweep: 5 clans × 10 seeds, aggregate. # # TODO: re-run when RUN-host environment stabilizes (see task #5 blocker thread # — Diplomacy class_name collision prevents game compile as of 2026-04-17). # # Per p0-02 acceptance and the ai-verify ↔ data-dev contract: # - Each clan runs via AI_PIN_PERSONALITY= + SEED_OFFSET= # - All 50 games land under one parent dir for single-gate aggregation # - Disjoint seed ranges (1..10, 11..20, ...) avoid find_game_dirs() collision # - personality_win_balance gate must exit 0 (no clan >50%, all ≥1 win) # # Produces verdict JSON at .local/iter/b5-/verdict.json with: # - pass: bool # - per_clan: {clan: {appearances, wins, win_rate_pct}} # - reasons: [str, ...] when pass=false # - supporting_metrics: captured from autoplay-report.py per-clan table # # Runs ON the RUN host via SSH. Results stay on RUN host; verdict JSON is the # single artifact shipped back to EDIT host for team-lead review. # # Exit codes: # 0 — gate passed, p0-02 acceptance can be cited # 1 — gate failed, verdict.json carries the specific reason(s) # 2 — usage / env error / sweep failure set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" : "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.lan)}" : "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}" STAMP="$(date +%Y%m%d_%H%M%S)" LOCAL_DIR="$PROJECT_DIR/.local/iter/b5-$STAMP" REMOTE_DIR="$PROJECT_ROOT_REMOTE/.local/iter/b5-$STAMP" TURN_LIMIT="${TURN_LIMIT:-300}" PER_CLAN_COUNT="${PER_CLAN_COUNT:-10}" PARALLEL="${PARALLEL:-10}" mkdir -p "$LOCAL_DIR" # Canonical clan order. Seed offsets are computed as (index × PER_CLAN_COUNT) so # a 10-seed-per-clan sweep yields disjoint ranges 1..10 / 11..20 / 21..30 / ... CLANS=(ironhold goldvein blackhammer deepforge runesmith) echo "============================================================" echo "B5 Aggregation — $STAMP" echo "Host: $AUTOPLAY_HOST" echo "Per-clan: $PER_CLAN_COUNT seeds, turn_limit=$TURN_LIMIT, PARALLEL=$PARALLEL" echo "Total games: $(( ${#CLANS[@]} * PER_CLAN_COUNT )) across ${#CLANS[@]} clans" echo "Remote results: $REMOTE_DIR" echo "============================================================" # ── Sweep each clan into the shared parent dir ────────────────────────────── idx=0 for clan in "${CLANS[@]}"; do offset=$(( idx * PER_CLAN_COUNT )) echo "" echo "[$(date +%H:%M:%S)] Sweep $((idx + 1))/${#CLANS[@]}: clan=$clan seeds=$((offset + 1))..$((offset + PER_CLAN_COUNT))" ssh "$AUTOPLAY_HOST" " mkdir -p '$REMOTE_DIR' cd '$PROJECT_ROOT_REMOTE' AI_USE_MCTS=true \ AI_PIN_PERSONALITY='$clan' \ SEED_OFFSET=$offset \ PARALLEL=$PARALLEL \ bash tools/autoplay-batch.sh $PER_CLAN_COUNT $TURN_LIMIT '$REMOTE_DIR' \ > '$REMOTE_DIR/sweep_${clan}.log' 2>&1 " || { echo "ERROR: sweep $clan failed — see $REMOTE_DIR/sweep_${clan}.log on $AUTOPLAY_HOST" >&2 scp "$AUTOPLAY_HOST:$REMOTE_DIR/sweep_${clan}.log" "$LOCAL_DIR/" 2>/dev/null || true exit 2 } idx=$((idx + 1)) done # ── Aggregate via autoplay-report (per-clan table) + gate ─────────────────── echo "" echo "[$(date +%H:%M:%S)] Aggregating 50 games — autoplay-report.py..." ssh "$AUTOPLAY_HOST" " cd '$PROJECT_ROOT_REMOTE' python3 tools/autoplay-report.py '$REMOTE_DIR' \ > '$REMOTE_DIR/autoplay-report.csv' \ 2> '$REMOTE_DIR/autoplay-report.summary' " echo "[$(date +%H:%M:%S)] Running personality_win_balance gate..." set +e ssh "$AUTOPLAY_HOST" " cd '$PROJECT_ROOT_REMOTE' python3 tools/checklist-report.py personality_win_balance '$REMOTE_DIR' \ > '$REMOTE_DIR/verdict.json' \ 2> '$REMOTE_DIR/gate.stderr' " gate_status=$? set -e # ── Fetch the small artifacts back ────────────────────────────────────────── for f in verdict.json gate.stderr autoplay-report.csv autoplay-report.summary; do scp "$AUTOPLAY_HOST:$REMOTE_DIR/$f" "$LOCAL_DIR/" 2>/dev/null || \ echo "WARN: could not fetch $f" >&2 done echo "" echo "============================================================" echo "Gate exit: $gate_status" echo "Local verdict: $LOCAL_DIR/verdict.json" echo "============================================================" if [ "$gate_status" -eq 0 ]; then echo "B5 PASS — p0-02 acceptance citable from this run." exit 0 else echo "B5 FAIL — see $LOCAL_DIR/verdict.json for reasons." >&2 exit 1 fi