magicciv/tools/matchup-grid.sh

#!/usr/bin/env bash
# matchup-grid.sh — 1v1 balanced-matchup grid across all 5 clan personalities.
#
# Runs every unordered pair of clans (C(5,2) = 10 pairs) through a seeded
# autoplay batch with `AI_PIN_PERSONALITY` pinned on slot 1 (the AI opponent;
# slot 0 is the heuristic-only human). Each pair runs `COUNT` seeds at
# `TURN_LIMIT` turns, so the full grid is 10 × COUNT games.
#
# The verdict is that win rates across the grid are BALANCED — no clan
# dominates, no clan is shut out. The `matchup_balance` checker in
# `checklist-report.py` enforces the precise threshold.
#
# This harness is the prerequisite gate for the "ultimate test"
# (`tools/huge-map-5clan.sh`): per the project owner, we don't run the
# 5-clan huge-map AI-only game until the 1v1 matchup grid shows the clans
# are balanced on equal footing.
#
# Usage:
#   tools/matchup-grid.sh            # defaults: COUNT=5 TURN_LIMIT=300 PARALLEL=4
#   COUNT=10 tools/matchup-grid.sh   # override via env
#   tools/matchup-grid.sh --help
#
# Output layout:
#   .local/iter/matchup-grid-<stamp>/
#     <clan_a>_vs_<clan_b>/             (10 pairs)
#       game_<stamp>_seed<N>/           (COUNT games each)
#         turn_stats.jsonl
#         meta.json
#     verdict.json                       (matchup_balance gate output)
#     summary.md                         (human-readable rollup)
#     completion.marker                   (finished_at + per-pair exit codes)
#
# Environment:
#   COUNT       — games per pair (default: 5)
#   TURN_LIMIT  — per-game turn cap (default: 300)
#   PARALLEL    — concurrent seeds per pair sweep (default: 4)
#   SEED_BASE   — starting seed (default: 0; pair i offsets by i*100)
set -uo pipefail

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
DIM='\033[2m'
NC='\033[0m'

: "${COUNT:=5}"
: "${TURN_LIMIT:=300}"
: "${PARALLEL:=4}"
: "${SEED_BASE:=0}"

for arg in "$@"; do
    case "$arg" in
        --help|-h)
            grep -E '^#( |$)' "$0" | sed 's/^# \?//'
            exit 0
            ;;
        *) echo "Unknown argument: $arg" >&2; exit 2 ;;
    esac
done

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
STAMP="$(date +%Y%m%d_%H%M%S)"
# MATCHUP_OUTPUT overrides the output dir (used by apricot-run.sh to direct
# output to $RESULTS_ABS/matchup-grid/ instead of the scratch .local/iter/).
PARENT="${MATCHUP_OUTPUT:-$REPO_ROOT/.local/iter/matchup-grid-$STAMP}"
mkdir -p "$PARENT"

CLANS=(ironhold goldvein blackhammer deepforge runesmith)
PAIRS=()
for ((i = 0; i < ${#CLANS[@]}; i++)); do
    for ((j = i + 1; j < ${#CLANS[@]}; j++)); do
        PAIRS+=("${CLANS[i]}_vs_${CLANS[j]}")
    done
done

echo -e "${BLUE}matchup-grid${NC} — ${#PAIRS[@]} pairs × ${COUNT} seeds × T${TURN_LIMIT}"
echo -e "${DIM}parent: $PARENT${NC}"
echo -e "${DIM}pairs: ${PAIRS[*]}${NC}"

MARKER="$PARENT/completion.marker"
: > "$MARKER"

pair_idx=0
for pair in "${PAIRS[@]}"; do
    clan_a="${pair%%_vs_*}"
    clan_b="${pair##*_vs_}"
    pair_dir="$PARENT/$pair"
    mkdir -p "$pair_dir"

    # Each pair gets a disjoint seed window so seeds don't collide across
    # pairs, which keeps determinism-compare usable later.
    offset=$((SEED_BASE + pair_idx * 100))

    # Per-slot pinning: clan_a in slot 0, clan_b in slot 1 for one half;
    # swap positions for the other half to remove positional bias.
    # AI_PIN_PERSONALITY_P{N} (added with personality_assigner.gd per-slot
    # support) overrides the is_human guard so BOTH players' clan_id is set
    # in meta.json — matchup_balance verdict can attribute every win.
    half=$((COUNT / 2))
    second_half=$((COUNT - half))

    echo -e "${YELLOW}[${pair_idx}/${#PAIRS[@]}]${NC} $pair (seeds $((offset + 1))..$((offset + COUNT)))"

    # Batch with clan_a in slot 0, clan_b in slot 1
    AI_PIN_PERSONALITY_P0="$clan_a" \
    AI_PIN_PERSONALITY_P1="$clan_b" \
    SEED_OFFSET=$offset \
    PARALLEL=$PARALLEL \
    bash "$REPO_ROOT/tools/autoplay-batch.sh" "$half" "$TURN_LIMIT" \
        "$pair_dir/as_${clan_a}" > "$pair_dir/as_${clan_a}.log" 2>&1
    a_rc=$?

    # Batch with clan_b in slot 0, clan_a in slot 1 (positional swap)
    AI_PIN_PERSONALITY_P0="$clan_b" \
    AI_PIN_PERSONALITY_P1="$clan_a" \
    SEED_OFFSET=$((offset + half)) \
    PARALLEL=$PARALLEL \
    bash "$REPO_ROOT/tools/autoplay-batch.sh" "$second_half" "$TURN_LIMIT" \
        "$pair_dir/as_${clan_b}" > "$pair_dir/as_${clan_b}.log" 2>&1
    b_rc=$?

    printf 'pair_%s_as_%s_exit=%d\n' "$pair" "$clan_a" "$a_rc" >> "$MARKER"
    printf 'pair_%s_as_%s_exit=%d\n' "$pair" "$clan_b" "$b_rc" >> "$MARKER"

    pair_idx=$((pair_idx + 1))
done

# Verdict across the grid via checklist-report.py
echo -e "${BLUE}computing matchup_balance verdict…${NC}"
python3 "$REPO_ROOT/tools/checklist-report.py" matchup_balance "$PARENT" \
    > "$PARENT/verdict.json" 2> "$PARENT/gate.stderr"
gate_rc=$?

printf 'gate_exit=%d\n' "$gate_rc" >> "$MARKER"
printf 'finished=%s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$MARKER"
printf 'parent=%s\n' "$PARENT" >> "$MARKER"

if [ "$gate_rc" -eq 0 ]; then
    echo -e "${GREEN}matchup_balance: PASS${NC}"
else
    echo -e "${RED}matchup_balance: FAIL${NC} (gate_exit=$gate_rc)"
    echo -e "${DIM}see: $PARENT/verdict.json${NC}"
fi

echo -e "${DIM}completion.marker: $MARKER${NC}"
exit $gate_rc