magicciv/tools/matchup-grid.sh
Natalie a9b8e23ae7 feat(@projects/@magic-civilization): add per-slot personality pinning via env vars
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-25 02:13:01 -07:00

147 lines
5.2 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# matchup-grid.sh — 1v1 balanced-matchup grid across all 5 clan personalities.
#
# Runs every unordered pair of clans (C(5,2) = 10 pairs) through a seeded
# autoplay batch with `AI_PIN_PERSONALITY` pinned on slot 1 (the AI opponent;
# slot 0 is the heuristic-only human). Each pair runs `COUNT` seeds at
# `TURN_LIMIT` turns, so the full grid is 10 × COUNT games.
#
# The verdict is that win rates across the grid are BALANCED — no clan
# dominates, no clan is shut out. The `matchup_balance` checker in
# `checklist-report.py` enforces the precise threshold.
#
# This harness is the prerequisite gate for the "ultimate test"
# (`tools/huge-map-5clan.sh`): per the project owner, we don't run the
# 5-clan huge-map AI-only game until the 1v1 matchup grid shows the clans
# are balanced on equal footing.
#
# Usage:
# tools/matchup-grid.sh # defaults: COUNT=5 TURN_LIMIT=300 PARALLEL=4
# COUNT=10 tools/matchup-grid.sh # override via env
# tools/matchup-grid.sh --help
#
# Output layout:
# .local/iter/matchup-grid-<stamp>/
# <clan_a>_vs_<clan_b>/ (10 pairs)
# game_<stamp>_seed<N>/ (COUNT games each)
# turn_stats.jsonl
# meta.json
# verdict.json (matchup_balance gate output)
# summary.md (human-readable rollup)
# completion.marker (finished_at + per-pair exit codes)
#
# Environment:
# COUNT — games per pair (default: 5)
# TURN_LIMIT — per-game turn cap (default: 300)
# PARALLEL — concurrent seeds per pair sweep (default: 4)
# SEED_BASE — starting seed (default: 0; pair i offsets by i*100)
set -uo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
DIM='\033[2m'
NC='\033[0m'
: "${COUNT:=5}"
: "${TURN_LIMIT:=300}"
: "${PARALLEL:=4}"
: "${SEED_BASE:=0}"
for arg in "$@"; do
case "$arg" in
--help|-h)
grep -E '^#( |$)' "$0" | sed 's/^# \?//'
exit 0
;;
*) echo "Unknown argument: $arg" >&2; exit 2 ;;
esac
done
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
STAMP="$(date +%Y%m%d_%H%M%S)"
# MATCHUP_OUTPUT overrides the output dir (used by apricot-run.sh to direct
# output to $RESULTS_ABS/matchup-grid/ instead of the scratch .local/iter/).
PARENT="${MATCHUP_OUTPUT:-$REPO_ROOT/.local/iter/matchup-grid-$STAMP}"
mkdir -p "$PARENT"
CLANS=(ironhold goldvein blackhammer deepforge runesmith)
PAIRS=()
for ((i = 0; i < ${#CLANS[@]}; i++)); do
for ((j = i + 1; j < ${#CLANS[@]}; j++)); do
PAIRS+=("${CLANS[i]}_vs_${CLANS[j]}")
done
done
echo -e "${BLUE}matchup-grid${NC}${#PAIRS[@]} pairs × ${COUNT} seeds × T${TURN_LIMIT}"
echo -e "${DIM}parent: $PARENT${NC}"
echo -e "${DIM}pairs: ${PAIRS[*]}${NC}"
MARKER="$PARENT/completion.marker"
: > "$MARKER"
pair_idx=0
for pair in "${PAIRS[@]}"; do
clan_a="${pair%%_vs_*}"
clan_b="${pair##*_vs_}"
pair_dir="$PARENT/$pair"
mkdir -p "$pair_dir"
# Each pair gets a disjoint seed window so seeds don't collide across
# pairs, which keeps determinism-compare usable later.
offset=$((SEED_BASE + pair_idx * 100))
# Per-slot pinning: clan_a in slot 0, clan_b in slot 1 for one half;
# swap positions for the other half to remove positional bias.
# AI_PIN_PERSONALITY_P{N} (added with personality_assigner.gd per-slot
# support) overrides the is_human guard so BOTH players' clan_id is set
# in meta.json — matchup_balance verdict can attribute every win.
half=$((COUNT / 2))
second_half=$((COUNT - half))
echo -e "${YELLOW}[${pair_idx}/${#PAIRS[@]}]${NC} $pair (seeds $((offset + 1))..$((offset + COUNT)))"
# Batch with clan_a in slot 0, clan_b in slot 1
AI_PIN_PERSONALITY_P0="$clan_a" \
AI_PIN_PERSONALITY_P1="$clan_b" \
SEED_OFFSET=$offset \
PARALLEL=$PARALLEL \
bash "$REPO_ROOT/tools/autoplay-batch.sh" "$half" "$TURN_LIMIT" \
"$pair_dir/as_${clan_a}" > "$pair_dir/as_${clan_a}.log" 2>&1
a_rc=$?
# Batch with clan_b in slot 0, clan_a in slot 1 (positional swap)
AI_PIN_PERSONALITY_P0="$clan_b" \
AI_PIN_PERSONALITY_P1="$clan_a" \
SEED_OFFSET=$((offset + half)) \
PARALLEL=$PARALLEL \
bash "$REPO_ROOT/tools/autoplay-batch.sh" "$second_half" "$TURN_LIMIT" \
"$pair_dir/as_${clan_b}" > "$pair_dir/as_${clan_b}.log" 2>&1
b_rc=$?
printf 'pair_%s_as_%s_exit=%d\n' "$pair" "$clan_a" "$a_rc" >> "$MARKER"
printf 'pair_%s_as_%s_exit=%d\n' "$pair" "$clan_b" "$b_rc" >> "$MARKER"
pair_idx=$((pair_idx + 1))
done
# Verdict across the grid via checklist-report.py
echo -e "${BLUE}computing matchup_balance verdict…${NC}"
python3 "$REPO_ROOT/tools/checklist-report.py" matchup_balance "$PARENT" \
> "$PARENT/verdict.json" 2> "$PARENT/gate.stderr"
gate_rc=$?
printf 'gate_exit=%d\n' "$gate_rc" >> "$MARKER"
printf 'finished=%s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$MARKER"
printf 'parent=%s\n' "$PARENT" >> "$MARKER"
if [ "$gate_rc" -eq 0 ]; then
echo -e "${GREEN}matchup_balance: PASS${NC}"
else
echo -e "${RED}matchup_balance: FAIL${NC} (gate_exit=$gate_rc)"
echo -e "${DIM}see: $PARENT/verdict.json${NC}"
fi
echo -e "${DIM}completion.marker: $MARKER${NC}"
exit $gate_rc