magicciv/tools/b5-aggregate.sh

#!/usr/bin/env bash
# b5-aggregate.sh — Run p0-02 B5 50-game sweep: 5 clans × 10 seeds, aggregate.
#
# TODO: re-run when RUN-host environment stabilizes (see task #5 blocker thread
# — Diplomacy class_name collision prevents game compile as of 2026-04-17).
#
# Per p0-02 acceptance and the ai-verify ↔ data-dev contract:
#   - Each clan runs via AI_PIN_PERSONALITY=<id> + SEED_OFFSET=<N*10>
#   - All 50 games land under one parent dir for single-gate aggregation
#   - Disjoint seed ranges (1..10, 11..20, ...) avoid find_game_dirs() collision
#   - personality_win_balance gate must exit 0 (no clan >50%, all ≥1 win)
#
# Produces verdict JSON at .local/iter/b5-<stamp>/verdict.json with:
#   - pass: bool
#   - per_clan: {clan: {appearances, wins, win_rate_pct}}
#   - reasons: [str, ...] when pass=false
#   - supporting_metrics: captured from autoplay-report.py per-clan table
#
# Runs ON the RUN host via SSH. Results stay on RUN host; verdict JSON is the
# single artifact shipped back to EDIT host for team-lead review.
#
# Exit codes:
#   0 — gate passed, p0-02 acceptance can be cited
#   1 — gate failed, verdict.json carries the specific reason(s)
#   2 — usage / env error / sweep failure

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"

: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.lan)}"
: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"

STAMP="$(date +%Y%m%d_%H%M%S)"
LOCAL_DIR="$PROJECT_DIR/.local/iter/b5-$STAMP"
REMOTE_DIR="$PROJECT_ROOT_REMOTE/.local/iter/b5-$STAMP"

TURN_LIMIT="${TURN_LIMIT:-300}"
PER_CLAN_COUNT="${PER_CLAN_COUNT:-10}"
PARALLEL="${PARALLEL:-10}"

mkdir -p "$LOCAL_DIR"

# Canonical clan order. Seed offsets are computed as (index × PER_CLAN_COUNT) so
# a 10-seed-per-clan sweep yields disjoint ranges 1..10 / 11..20 / 21..30 / ...
CLANS=(ironhold goldvein blackhammer deepforge runesmith)

echo "============================================================"
echo "B5 Aggregation — $STAMP"
echo "Host: $AUTOPLAY_HOST"
echo "Per-clan: $PER_CLAN_COUNT seeds, turn_limit=$TURN_LIMIT, PARALLEL=$PARALLEL"
echo "Total games: $(( ${#CLANS[@]} * PER_CLAN_COUNT )) across ${#CLANS[@]} clans"
echo "Remote results: $REMOTE_DIR"
echo "============================================================"

# ── Sweep each clan into the shared parent dir ──────────────────────────────
idx=0
for clan in "${CLANS[@]}"; do
    offset=$(( idx * PER_CLAN_COUNT ))
    echo ""
    echo "[$(date +%H:%M:%S)] Sweep $((idx + 1))/${#CLANS[@]}: clan=$clan seeds=$((offset + 1))..$((offset + PER_CLAN_COUNT))"
    ssh "$AUTOPLAY_HOST" "
        mkdir -p '$REMOTE_DIR'
        cd '$PROJECT_ROOT_REMOTE'
        AI_USE_MCTS=true \
        AI_PIN_PERSONALITY='$clan' \
        SEED_OFFSET=$offset \
        PARALLEL=$PARALLEL \
        bash tools/autoplay-batch.sh $PER_CLAN_COUNT $TURN_LIMIT '$REMOTE_DIR' \
            > '$REMOTE_DIR/sweep_${clan}.log' 2>&1
    " || {
        echo "ERROR: sweep $clan failed — see $REMOTE_DIR/sweep_${clan}.log on $AUTOPLAY_HOST" >&2
        scp "$AUTOPLAY_HOST:$REMOTE_DIR/sweep_${clan}.log" "$LOCAL_DIR/" 2>/dev/null || true
        exit 2
    }
    idx=$((idx + 1))
done

# ── Aggregate via autoplay-report (per-clan table) + gate ───────────────────
echo ""
echo "[$(date +%H:%M:%S)] Aggregating 50 games — autoplay-report.py..."
ssh "$AUTOPLAY_HOST" "
    cd '$PROJECT_ROOT_REMOTE'
    python3 tools/autoplay-report.py '$REMOTE_DIR' \
        > '$REMOTE_DIR/autoplay-report.csv' \
        2> '$REMOTE_DIR/autoplay-report.summary'
"

echo "[$(date +%H:%M:%S)] Running personality_win_balance gate..."
set +e
ssh "$AUTOPLAY_HOST" "
    cd '$PROJECT_ROOT_REMOTE'
    python3 tools/checklist-report.py personality_win_balance '$REMOTE_DIR' \
        > '$REMOTE_DIR/verdict.json' \
        2> '$REMOTE_DIR/gate.stderr'
"
gate_status=$?
set -e

# ── Fetch the small artifacts back ──────────────────────────────────────────
for f in verdict.json gate.stderr autoplay-report.csv autoplay-report.summary; do
    scp "$AUTOPLAY_HOST:$REMOTE_DIR/$f" "$LOCAL_DIR/" 2>/dev/null || \
        echo "WARN: could not fetch $f" >&2
done

echo ""
echo "============================================================"
echo "Gate exit: $gate_status"
echo "Local verdict: $LOCAL_DIR/verdict.json"
echo "============================================================"

if [ "$gate_status" -eq 0 ]; then
    echo "B5 PASS — p0-02 acceptance citable from this run."
    exit 0
else
    echo "B5 FAIL — see $LOCAL_DIR/verdict.json for reasons." >&2
    exit 1
fi