magicciv/tools/b5-aggregate.sh

120 lines
4.7 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# b5-aggregate.sh — Run p0-02 B5 50-game sweep: 5 clans × 10 seeds, aggregate.
#
# TODO: re-run when RUN-host environment stabilizes (see task #5 blocker thread
# — Diplomacy class_name collision prevents game compile as of 2026-04-17).
#
# Per p0-02 acceptance and the ai-verify ↔ data-dev contract:
# - Each clan runs via AI_PIN_PERSONALITY=<id> + SEED_OFFSET=<N*10>
# - All 50 games land under one parent dir for single-gate aggregation
# - Disjoint seed ranges (1..10, 11..20, ...) avoid find_game_dirs() collision
# - personality_win_balance gate must exit 0 (no clan >50%, all ≥1 win)
#
# Produces verdict JSON at .local/iter/b5-<stamp>/verdict.json with:
# - pass: bool
# - per_clan: {clan: {appearances, wins, win_rate_pct}}
# - reasons: [str, ...] when pass=false
# - supporting_metrics: captured from autoplay-report.py per-clan table
#
# Runs ON the RUN host via SSH. Results stay on RUN host; verdict JSON is the
# single artifact shipped back to EDIT host for team-lead review.
#
# Exit codes:
# 0 — gate passed, p0-02 acceptance can be cited
# 1 — gate failed, verdict.json carries the specific reason(s)
# 2 — usage / env error / sweep failure
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.local)}"
: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"
STAMP="$(date +%Y%m%d_%H%M%S)"
LOCAL_DIR="$PROJECT_DIR/.local/iter/b5-$STAMP"
REMOTE_DIR="$PROJECT_ROOT_REMOTE/.local/iter/b5-$STAMP"
TURN_LIMIT="${TURN_LIMIT:-300}"
PER_CLAN_COUNT="${PER_CLAN_COUNT:-10}"
PARALLEL="${PARALLEL:-10}"
mkdir -p "$LOCAL_DIR"
# Canonical clan order. Seed offsets are computed as (index × PER_CLAN_COUNT) so
# a 10-seed-per-clan sweep yields disjoint ranges 1..10 / 11..20 / 21..30 / ...
CLANS=(ironhold goldvein blackhammer deepforge runesmith)
echo "============================================================"
echo "B5 Aggregation — $STAMP"
echo "Host: $AUTOPLAY_HOST"
echo "Per-clan: $PER_CLAN_COUNT seeds, turn_limit=$TURN_LIMIT, PARALLEL=$PARALLEL"
echo "Total games: $(( ${#CLANS[@]} * PER_CLAN_COUNT )) across ${#CLANS[@]} clans"
echo "Remote results: $REMOTE_DIR"
echo "============================================================"
# ── Sweep each clan into the shared parent dir ──────────────────────────────
idx=0
for clan in "${CLANS[@]}"; do
offset=$(( idx * PER_CLAN_COUNT ))
echo ""
echo "[$(date +%H:%M:%S)] Sweep $((idx + 1))/${#CLANS[@]}: clan=$clan seeds=$((offset + 1))..$((offset + PER_CLAN_COUNT))"
ssh "$AUTOPLAY_HOST" "
mkdir -p '$REMOTE_DIR'
cd '$PROJECT_ROOT_REMOTE'
AI_USE_MCTS=true \
AI_PIN_PERSONALITY='$clan' \
SEED_OFFSET=$offset \
PARALLEL=$PARALLEL \
bash tools/autoplay-batch.sh $PER_CLAN_COUNT $TURN_LIMIT '$REMOTE_DIR' \
> '$REMOTE_DIR/sweep_${clan}.log' 2>&1
" || {
echo "ERROR: sweep $clan failed — see $REMOTE_DIR/sweep_${clan}.log on $AUTOPLAY_HOST" >&2
scp "$AUTOPLAY_HOST:$REMOTE_DIR/sweep_${clan}.log" "$LOCAL_DIR/" 2>/dev/null || true
exit 2
}
idx=$((idx + 1))
done
# ── Aggregate via autoplay-report (per-clan table) + gate ───────────────────
echo ""
echo "[$(date +%H:%M:%S)] Aggregating 50 games — autoplay-report.py..."
ssh "$AUTOPLAY_HOST" "
cd '$PROJECT_ROOT_REMOTE'
python3 tools/autoplay-report.py '$REMOTE_DIR' \
> '$REMOTE_DIR/autoplay-report.csv' \
2> '$REMOTE_DIR/autoplay-report.summary'
"
echo "[$(date +%H:%M:%S)] Running personality_win_balance gate..."
set +e
ssh "$AUTOPLAY_HOST" "
cd '$PROJECT_ROOT_REMOTE'
python3 tools/checklist-report.py personality_win_balance '$REMOTE_DIR' \
> '$REMOTE_DIR/verdict.json' \
2> '$REMOTE_DIR/gate.stderr'
"
gate_status=$?
set -e
# ── Fetch the small artifacts back ──────────────────────────────────────────
for f in verdict.json gate.stderr autoplay-report.csv autoplay-report.summary; do
scp "$AUTOPLAY_HOST:$REMOTE_DIR/$f" "$LOCAL_DIR/" 2>/dev/null || \
echo "WARN: could not fetch $f" >&2
done
echo ""
echo "============================================================"
echo "Gate exit: $gate_status"
echo "Local verdict: $LOCAL_DIR/verdict.json"
echo "============================================================"
if [ "$gate_status" -eq 0 ]; then
echo "B5 PASS — p0-02 acceptance citable from this run."
exit 0
else
echo "B5 FAIL — see $LOCAL_DIR/verdict.json for reasons." >&2
exit 1
fi