120 lines
4.7 KiB
Bash
120 lines
4.7 KiB
Bash
|
|
#!/usr/bin/env bash
|
|||
|
|
# b5-aggregate.sh — Run p0-02 B5 50-game sweep: 5 clans × 10 seeds, aggregate.
|
|||
|
|
#
|
|||
|
|
# TODO: re-run when RUN-host environment stabilizes (see task #5 blocker thread
|
|||
|
|
# — Diplomacy class_name collision prevents game compile as of 2026-04-17).
|
|||
|
|
#
|
|||
|
|
# Per p0-02 acceptance and the ai-verify ↔ data-dev contract:
|
|||
|
|
# - Each clan runs via AI_PIN_PERSONALITY=<id> + SEED_OFFSET=<N*10>
|
|||
|
|
# - All 50 games land under one parent dir for single-gate aggregation
|
|||
|
|
# - Disjoint seed ranges (1..10, 11..20, ...) avoid find_game_dirs() collision
|
|||
|
|
# - personality_win_balance gate must exit 0 (no clan >50%, all ≥1 win)
|
|||
|
|
#
|
|||
|
|
# Produces verdict JSON at .local/iter/b5-<stamp>/verdict.json with:
|
|||
|
|
# - pass: bool
|
|||
|
|
# - per_clan: {clan: {appearances, wins, win_rate_pct}}
|
|||
|
|
# - reasons: [str, ...] when pass=false
|
|||
|
|
# - supporting_metrics: captured from autoplay-report.py per-clan table
|
|||
|
|
#
|
|||
|
|
# Runs ON the RUN host via SSH. Results stay on RUN host; verdict JSON is the
|
|||
|
|
# single artifact shipped back to EDIT host for team-lead review.
|
|||
|
|
#
|
|||
|
|
# Exit codes:
|
|||
|
|
# 0 — gate passed, p0-02 acceptance can be cited
|
|||
|
|
# 1 — gate failed, verdict.json carries the specific reason(s)
|
|||
|
|
# 2 — usage / env error / sweep failure
|
|||
|
|
|
|||
|
|
set -euo pipefail
|
|||
|
|
|
|||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||
|
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
|||
|
|
|
|||
|
|
: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.local)}"
|
|||
|
|
: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"
|
|||
|
|
|
|||
|
|
STAMP="$(date +%Y%m%d_%H%M%S)"
|
|||
|
|
LOCAL_DIR="$PROJECT_DIR/.local/iter/b5-$STAMP"
|
|||
|
|
REMOTE_DIR="$PROJECT_ROOT_REMOTE/.local/iter/b5-$STAMP"
|
|||
|
|
|
|||
|
|
TURN_LIMIT="${TURN_LIMIT:-300}"
|
|||
|
|
PER_CLAN_COUNT="${PER_CLAN_COUNT:-10}"
|
|||
|
|
PARALLEL="${PARALLEL:-10}"
|
|||
|
|
|
|||
|
|
mkdir -p "$LOCAL_DIR"
|
|||
|
|
|
|||
|
|
# Canonical clan order. Seed offsets are computed as (index × PER_CLAN_COUNT) so
|
|||
|
|
# a 10-seed-per-clan sweep yields disjoint ranges 1..10 / 11..20 / 21..30 / ...
|
|||
|
|
CLANS=(ironhold goldvein blackhammer deepforge runesmith)
|
|||
|
|
|
|||
|
|
echo "============================================================"
|
|||
|
|
echo "B5 Aggregation — $STAMP"
|
|||
|
|
echo "Host: $AUTOPLAY_HOST"
|
|||
|
|
echo "Per-clan: $PER_CLAN_COUNT seeds, turn_limit=$TURN_LIMIT, PARALLEL=$PARALLEL"
|
|||
|
|
echo "Total games: $(( ${#CLANS[@]} * PER_CLAN_COUNT )) across ${#CLANS[@]} clans"
|
|||
|
|
echo "Remote results: $REMOTE_DIR"
|
|||
|
|
echo "============================================================"
|
|||
|
|
|
|||
|
|
# ── Sweep each clan into the shared parent dir ──────────────────────────────
|
|||
|
|
idx=0
|
|||
|
|
for clan in "${CLANS[@]}"; do
|
|||
|
|
offset=$(( idx * PER_CLAN_COUNT ))
|
|||
|
|
echo ""
|
|||
|
|
echo "[$(date +%H:%M:%S)] Sweep $((idx + 1))/${#CLANS[@]}: clan=$clan seeds=$((offset + 1))..$((offset + PER_CLAN_COUNT))"
|
|||
|
|
ssh "$AUTOPLAY_HOST" "
|
|||
|
|
mkdir -p '$REMOTE_DIR'
|
|||
|
|
cd '$PROJECT_ROOT_REMOTE'
|
|||
|
|
AI_USE_MCTS=true \
|
|||
|
|
AI_PIN_PERSONALITY='$clan' \
|
|||
|
|
SEED_OFFSET=$offset \
|
|||
|
|
PARALLEL=$PARALLEL \
|
|||
|
|
bash tools/autoplay-batch.sh $PER_CLAN_COUNT $TURN_LIMIT '$REMOTE_DIR' \
|
|||
|
|
> '$REMOTE_DIR/sweep_${clan}.log' 2>&1
|
|||
|
|
" || {
|
|||
|
|
echo "ERROR: sweep $clan failed — see $REMOTE_DIR/sweep_${clan}.log on $AUTOPLAY_HOST" >&2
|
|||
|
|
scp "$AUTOPLAY_HOST:$REMOTE_DIR/sweep_${clan}.log" "$LOCAL_DIR/" 2>/dev/null || true
|
|||
|
|
exit 2
|
|||
|
|
}
|
|||
|
|
idx=$((idx + 1))
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
# ── Aggregate via autoplay-report (per-clan table) + gate ───────────────────
|
|||
|
|
echo ""
|
|||
|
|
echo "[$(date +%H:%M:%S)] Aggregating 50 games — autoplay-report.py..."
|
|||
|
|
ssh "$AUTOPLAY_HOST" "
|
|||
|
|
cd '$PROJECT_ROOT_REMOTE'
|
|||
|
|
python3 tools/autoplay-report.py '$REMOTE_DIR' \
|
|||
|
|
> '$REMOTE_DIR/autoplay-report.csv' \
|
|||
|
|
2> '$REMOTE_DIR/autoplay-report.summary'
|
|||
|
|
"
|
|||
|
|
|
|||
|
|
echo "[$(date +%H:%M:%S)] Running personality_win_balance gate..."
|
|||
|
|
set +e
|
|||
|
|
ssh "$AUTOPLAY_HOST" "
|
|||
|
|
cd '$PROJECT_ROOT_REMOTE'
|
|||
|
|
python3 tools/checklist-report.py personality_win_balance '$REMOTE_DIR' \
|
|||
|
|
> '$REMOTE_DIR/verdict.json' \
|
|||
|
|
2> '$REMOTE_DIR/gate.stderr'
|
|||
|
|
"
|
|||
|
|
gate_status=$?
|
|||
|
|
set -e
|
|||
|
|
|
|||
|
|
# ── Fetch the small artifacts back ──────────────────────────────────────────
|
|||
|
|
for f in verdict.json gate.stderr autoplay-report.csv autoplay-report.summary; do
|
|||
|
|
scp "$AUTOPLAY_HOST:$REMOTE_DIR/$f" "$LOCAL_DIR/" 2>/dev/null || \
|
|||
|
|
echo "WARN: could not fetch $f" >&2
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo "============================================================"
|
|||
|
|
echo "Gate exit: $gate_status"
|
|||
|
|
echo "Local verdict: $LOCAL_DIR/verdict.json"
|
|||
|
|
echo "============================================================"
|
|||
|
|
|
|||
|
|
if [ "$gate_status" -eq 0 ]; then
|
|||
|
|
echo "B5 PASS — p0-02 acceptance citable from this run."
|
|||
|
|
exit 0
|
|||
|
|
else
|
|||
|
|
echo "B5 FAIL — see $LOCAL_DIR/verdict.json for reasons." >&2
|
|||
|
|
exit 1
|
|||
|
|
fi
|