119 lines
4.7 KiB
Bash
Executable file
119 lines
4.7 KiB
Bash
Executable file
#!/usr/bin/env bash
|
||
# b5-aggregate.sh — Run p0-02 B5 50-game sweep: 5 clans × 10 seeds, aggregate.
|
||
#
|
||
# TODO: re-run when RUN-host environment stabilizes (see task #5 blocker thread
|
||
# — Diplomacy class_name collision prevents game compile as of 2026-04-17).
|
||
#
|
||
# Per p0-02 acceptance and the ai-verify ↔ data-dev contract:
|
||
# - Each clan runs via AI_PIN_PERSONALITY=<id> + SEED_OFFSET=<N*10>
|
||
# - All 50 games land under one parent dir for single-gate aggregation
|
||
# - Disjoint seed ranges (1..10, 11..20, ...) avoid find_game_dirs() collision
|
||
# - personality_win_balance gate must exit 0 (no clan >50%, all ≥1 win)
|
||
#
|
||
# Produces verdict JSON at .local/iter/b5-<stamp>/verdict.json with:
|
||
# - pass: bool
|
||
# - per_clan: {clan: {appearances, wins, win_rate_pct}}
|
||
# - reasons: [str, ...] when pass=false
|
||
# - supporting_metrics: captured from autoplay-report.py per-clan table
|
||
#
|
||
# Runs ON the RUN host via SSH. Results stay on RUN host; verdict JSON is the
|
||
# single artifact shipped back to EDIT host for team-lead review.
|
||
#
|
||
# Exit codes:
|
||
# 0 — gate passed, p0-02 acceptance can be cited
|
||
# 1 — gate failed, verdict.json carries the specific reason(s)
|
||
# 2 — usage / env error / sweep failure
|
||
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||
|
||
: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.lan)}"
|
||
: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"
|
||
|
||
STAMP="$(date +%Y%m%d_%H%M%S)"
|
||
LOCAL_DIR="$PROJECT_DIR/.local/iter/b5-$STAMP"
|
||
REMOTE_DIR="$PROJECT_ROOT_REMOTE/.local/iter/b5-$STAMP"
|
||
|
||
TURN_LIMIT="${TURN_LIMIT:-300}"
|
||
PER_CLAN_COUNT="${PER_CLAN_COUNT:-10}"
|
||
PARALLEL="${PARALLEL:-10}"
|
||
|
||
mkdir -p "$LOCAL_DIR"
|
||
|
||
# Canonical clan order. Seed offsets are computed as (index × PER_CLAN_COUNT) so
|
||
# a 10-seed-per-clan sweep yields disjoint ranges 1..10 / 11..20 / 21..30 / ...
|
||
CLANS=(ironhold goldvein blackhammer deepforge runesmith)
|
||
|
||
echo "============================================================"
|
||
echo "B5 Aggregation — $STAMP"
|
||
echo "Host: $AUTOPLAY_HOST"
|
||
echo "Per-clan: $PER_CLAN_COUNT seeds, turn_limit=$TURN_LIMIT, PARALLEL=$PARALLEL"
|
||
echo "Total games: $(( ${#CLANS[@]} * PER_CLAN_COUNT )) across ${#CLANS[@]} clans"
|
||
echo "Remote results: $REMOTE_DIR"
|
||
echo "============================================================"
|
||
|
||
# ── Sweep each clan into the shared parent dir ──────────────────────────────
|
||
idx=0
|
||
for clan in "${CLANS[@]}"; do
|
||
offset=$(( idx * PER_CLAN_COUNT ))
|
||
echo ""
|
||
echo "[$(date +%H:%M:%S)] Sweep $((idx + 1))/${#CLANS[@]}: clan=$clan seeds=$((offset + 1))..$((offset + PER_CLAN_COUNT))"
|
||
ssh "$AUTOPLAY_HOST" "
|
||
mkdir -p '$REMOTE_DIR'
|
||
cd '$PROJECT_ROOT_REMOTE'
|
||
AI_USE_MCTS=true \
|
||
AI_PIN_PERSONALITY='$clan' \
|
||
SEED_OFFSET=$offset \
|
||
PARALLEL=$PARALLEL \
|
||
bash tools/autoplay-batch.sh $PER_CLAN_COUNT $TURN_LIMIT '$REMOTE_DIR' \
|
||
> '$REMOTE_DIR/sweep_${clan}.log' 2>&1
|
||
" || {
|
||
echo "ERROR: sweep $clan failed — see $REMOTE_DIR/sweep_${clan}.log on $AUTOPLAY_HOST" >&2
|
||
scp "$AUTOPLAY_HOST:$REMOTE_DIR/sweep_${clan}.log" "$LOCAL_DIR/" 2>/dev/null || true
|
||
exit 2
|
||
}
|
||
idx=$((idx + 1))
|
||
done
|
||
|
||
# ── Aggregate via autoplay-report (per-clan table) + gate ───────────────────
|
||
echo ""
|
||
echo "[$(date +%H:%M:%S)] Aggregating 50 games — autoplay-report.py..."
|
||
ssh "$AUTOPLAY_HOST" "
|
||
cd '$PROJECT_ROOT_REMOTE'
|
||
python3 tools/autoplay-report.py '$REMOTE_DIR' \
|
||
> '$REMOTE_DIR/autoplay-report.csv' \
|
||
2> '$REMOTE_DIR/autoplay-report.summary'
|
||
"
|
||
|
||
echo "[$(date +%H:%M:%S)] Running personality_win_balance gate..."
|
||
set +e
|
||
ssh "$AUTOPLAY_HOST" "
|
||
cd '$PROJECT_ROOT_REMOTE'
|
||
python3 tools/checklist-report.py personality_win_balance '$REMOTE_DIR' \
|
||
> '$REMOTE_DIR/verdict.json' \
|
||
2> '$REMOTE_DIR/gate.stderr'
|
||
"
|
||
gate_status=$?
|
||
set -e
|
||
|
||
# ── Fetch the small artifacts back ──────────────────────────────────────────
|
||
for f in verdict.json gate.stderr autoplay-report.csv autoplay-report.summary; do
|
||
scp "$AUTOPLAY_HOST:$REMOTE_DIR/$f" "$LOCAL_DIR/" 2>/dev/null || \
|
||
echo "WARN: could not fetch $f" >&2
|
||
done
|
||
|
||
echo ""
|
||
echo "============================================================"
|
||
echo "Gate exit: $gate_status"
|
||
echo "Local verdict: $LOCAL_DIR/verdict.json"
|
||
echo "============================================================"
|
||
|
||
if [ "$gate_status" -eq 0 ]; then
|
||
echo "B5 PASS — p0-02 acceptance citable from this run."
|
||
exit 0
|
||
else
|
||
echo "B5 FAIL — see $LOCAL_DIR/verdict.json for reasons." >&2
|
||
exit 1
|
||
fi
|