magicciv/tools/matchup-grid.sh
Natalie 7ad46c48c1 feat(@projects/@magic-civilization): update ai progression difficulty metrics
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-18 21:10:17 -07:00

143 lines
4.9 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# matchup-grid.sh — 1v1 balanced-matchup grid across all 5 clan personalities.
#
# Runs every unordered pair of clans (C(5,2) = 10 pairs) through a seeded
# autoplay batch with `AI_PIN_PERSONALITY` pinned on slot 1 (the AI opponent;
# slot 0 is the heuristic-only human). Each pair runs `COUNT` seeds at
# `TURN_LIMIT` turns, so the full grid is 10 × COUNT games.
#
# The verdict is that win rates across the grid are BALANCED — no clan
# dominates, no clan is shut out. The `matchup_balance` checker in
# `checklist-report.py` enforces the precise threshold.
#
# This harness is the prerequisite gate for the "ultimate test"
# (`tools/huge-map-5clan.sh`): per the project owner, we don't run the
# 5-clan huge-map AI-only game until the 1v1 matchup grid shows the clans
# are balanced on equal footing.
#
# Usage:
# tools/matchup-grid.sh # defaults: COUNT=5 TURN_LIMIT=300 PARALLEL=4
# COUNT=10 tools/matchup-grid.sh # override via env
# tools/matchup-grid.sh --help
#
# Output layout:
# .local/iter/matchup-grid-<stamp>/
# <clan_a>_vs_<clan_b>/ (10 pairs)
# game_<stamp>_seed<N>/ (COUNT games each)
# turn_stats.jsonl
# meta.json
# verdict.json (matchup_balance gate output)
# summary.md (human-readable rollup)
# completion.marker (finished_at + per-pair exit codes)
#
# Environment:
# COUNT — games per pair (default: 5)
# TURN_LIMIT — per-game turn cap (default: 300)
# PARALLEL — concurrent seeds per pair sweep (default: 4)
# SEED_BASE — starting seed (default: 0; pair i offsets by i*100)
set -uo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
DIM='\033[2m'
NC='\033[0m'
: "${COUNT:=5}"
: "${TURN_LIMIT:=300}"
: "${PARALLEL:=4}"
: "${SEED_BASE:=0}"
for arg in "$@"; do
case "$arg" in
--help|-h)
grep -E '^#( |$)' "$0" | sed 's/^# \?//'
exit 0
;;
*) echo "Unknown argument: $arg" >&2; exit 2 ;;
esac
done
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
STAMP="$(date +%Y%m%d_%H%M%S)"
# MATCHUP_OUTPUT overrides the output dir (used by apricot-run.sh to direct
# output to $RESULTS_ABS/matchup-grid/ instead of the scratch .local/iter/).
PARENT="${MATCHUP_OUTPUT:-$REPO_ROOT/.local/iter/matchup-grid-$STAMP}"
mkdir -p "$PARENT"
CLANS=(ironhold goldvein blackhammer deepforge runesmith)
PAIRS=()
for ((i = 0; i < ${#CLANS[@]}; i++)); do
for ((j = i + 1; j < ${#CLANS[@]}; j++)); do
PAIRS+=("${CLANS[i]}_vs_${CLANS[j]}")
done
done
echo -e "${BLUE}matchup-grid${NC}${#PAIRS[@]} pairs × ${COUNT} seeds × T${TURN_LIMIT}"
echo -e "${DIM}parent: $PARENT${NC}"
echo -e "${DIM}pairs: ${PAIRS[*]}${NC}"
MARKER="$PARENT/completion.marker"
: > "$MARKER"
pair_idx=0
for pair in "${PAIRS[@]}"; do
clan_a="${pair%%_vs_*}"
clan_b="${pair##*_vs_}"
pair_dir="$PARENT/$pair"
mkdir -p "$pair_dir"
# Each pair gets a disjoint seed window so seeds don't collide across
# pairs, which keeps determinism-compare usable later.
offset=$((SEED_BASE + pair_idx * 100))
# Half the games: clan_a on slot 1 (AI opponent). Other half: clan_b.
# This keeps positional fairness — the "who's AI vs who's heuristic"
# question doesn't bias the grid.
half=$((COUNT / 2))
second_half=$((COUNT - half))
echo -e "${YELLOW}[${pair_idx}/${#PAIRS[@]}]${NC} $pair (seeds $((offset + 1))..$((offset + COUNT)))"
# Batch with clan_a as AI
AI_PIN_PERSONALITY="$clan_a" \
SEED_OFFSET=$offset \
PARALLEL=$PARALLEL \
bash "$REPO_ROOT/tools/autoplay-batch.sh" "$half" "$TURN_LIMIT" \
"$pair_dir/as_${clan_a}" > "$pair_dir/as_${clan_a}.log" 2>&1
a_rc=$?
# Batch with clan_b as AI
AI_PIN_PERSONALITY="$clan_b" \
SEED_OFFSET=$((offset + half)) \
PARALLEL=$PARALLEL \
bash "$REPO_ROOT/tools/autoplay-batch.sh" "$second_half" "$TURN_LIMIT" \
"$pair_dir/as_${clan_b}" > "$pair_dir/as_${clan_b}.log" 2>&1
b_rc=$?
printf 'pair_%s_as_%s_exit=%d\n' "$pair" "$clan_a" "$a_rc" >> "$MARKER"
printf 'pair_%s_as_%s_exit=%d\n' "$pair" "$clan_b" "$b_rc" >> "$MARKER"
pair_idx=$((pair_idx + 1))
done
# Verdict across the grid via checklist-report.py
echo -e "${BLUE}computing matchup_balance verdict…${NC}"
python3 "$REPO_ROOT/tools/checklist-report.py" matchup_balance "$PARENT" \
> "$PARENT/verdict.json" 2> "$PARENT/gate.stderr"
gate_rc=$?
printf 'gate_exit=%d\n' "$gate_rc" >> "$MARKER"
printf 'finished=%s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$MARKER"
printf 'parent=%s\n' "$PARENT" >> "$MARKER"
if [ "$gate_rc" -eq 0 ]; then
echo -e "${GREEN}matchup_balance: PASS${NC}"
else
echo -e "${RED}matchup_balance: FAIL${NC} (gate_exit=$gate_rc)"
echo -e "${DIM}see: $PARENT/verdict.json${NC}"
fi
echo -e "${DIM}completion.marker: $MARKER${NC}"
exit $gate_rc