magicciv/tools/huge-map-5clan.sh
Natalie 2c2c1e4ef5 feat(@projects): define tree component types
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-05-03 13:31:15 -04:00

124 lines
5.1 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# huge-map-5clan.sh — THE "ultimate test". 5 AI clan personalities compete
# on a map sized for 8 players, stressing the AI lookahead pipeline
# end-to-end.
#
# Per project owner: this test should only run AFTER the 1v1 matchup grid
# (`tools/matchup-grid.sh`) has shown clans are balanced in head-to-head
# play.
#
# The map-size name here ("huge") matches the id in setup.json; dimensions
# and max_players are read from the data file. If that data file's "huge"
# id changes capacity, this harness picks it up automatically.
#
# Acceptance criteria (validated via `checklist-report.py ultimate_stress`):
# - All 5 clans appear in at least one of the SEEDS runs
# - Victory rate ≥ SEEDS/2 (games decisive — MCTS not stalling)
# - Winner distribution non-degenerate: ≥2 distinct clans win across grid
# - Median game length ≥ TURN_LIMIT*0.4
#
# Usage:
# tools/huge-map-5clan.sh # defaults SEEDS=10 TURN_LIMIT=500 PARALLEL=4
# SEEDS=20 tools/huge-map-5clan.sh
# tools/huge-map-5clan.sh --help
#
# Output layout:
# .local/iter/huge-map-5clan-<stamp>/
# game_<stamp>_seed<N>/ (SEEDS games, 5 AI clans each)
# verdict.json
# completion.marker
set -uo pipefail
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'
DIM='\033[2m'; NC='\033[0m'
: "${SEEDS:=10}"
: "${TURN_LIMIT:=500}"
: "${PARALLEL:=4}"
: "${MAP_SIZE:=standard}" # Civ5 "Standard" = 80×52, max 8 players — the
: "${NUM_PLAYERS:=5}" # smallest map that fits the user's "huge map
# that 8 COULD play on" intent. Our own "huge"
# (128×80, 12-player) is stretch-goal; switch to
# MAP_SIZE=huge once POD's MAX_PLAYERS=4 limit is
# lifted and the game supports >8 AI slots.
# p1-22: bound MCTS per-decision wall-clock cost. 2000 ms caps each AI
# decision so slow seeds finish in ~5s/turn × 5 players × 500 turns ≈ 3.5 hr
# per game — well within the 3600s safety timeout.
: "${MCTS_DECISION_BUDGET_MS:=2000}"
for arg in "$@"; do
case "$arg" in
--help|-h)
grep -E '^#( |$)' "$0" | sed 's/^# \?//'
exit 0 ;;
*) echo "Unknown argument: $arg" >&2; exit 2 ;;
esac
done
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
STAMP="$(date +%Y%m%d_%H%M%S)"
# HUGE_OUTPUT overrides the output dir (used by apricot-run.sh).
PARENT="${HUGE_OUTPUT:-$REPO_ROOT/.local/iter/huge-map-5clan-$STAMP}"
mkdir -p "$PARENT"
# Preflight: check for a passing matchup-grid within the last 30 days.
LATEST_MATCHUP_GRID="$(ls -td "$REPO_ROOT"/.local/iter/matchup-grid-*/ 2>/dev/null | head -1)"
if [ -z "$LATEST_MATCHUP_GRID" ]; then
echo -e "${YELLOW}WARN: no matchup-grid run found.${NC}"
echo -e "${DIM}Per project owner, 1v1 matchup balance should pass before running the ultimate test.${NC}"
echo -e "${DIM}Run: tools/matchup-grid.sh${NC}"
echo ""
else
matchup_verdict="$LATEST_MATCHUP_GRID/verdict.json"
if [ -f "$matchup_verdict" ] && command -v python3 >/dev/null; then
pass=$(python3 -c "import json; print(json.load(open('$matchup_verdict')).get('pass', False))" 2>/dev/null || echo False)
if [ "$pass" = "True" ]; then
echo -e "${GREEN}prereq: matchup-grid verdict PASS${NC} ($LATEST_MATCHUP_GRID)"
else
echo -e "${YELLOW}WARN: most recent matchup-grid verdict is NOT passing.${NC}"
echo -e "${DIM}$matchup_verdict${NC}"
fi
fi
fi
echo -e "${BLUE}huge-map-5clan (ultimate stress)${NC}${SEEDS} seeds × T${TURN_LIMIT} × ${NUM_PLAYERS} AI on ${MAP_SIZE} map"
echo -e "${DIM}parent: $PARENT${NC}"
MARKER="$PARENT/completion.marker"
: > "$MARKER"
# Pin all 5 slots to the 5 canonical clans so meta.player_clans is fully
# populated for the ultimate_stress verdict. Without per-slot pinning, slot 0
# (the human slot) gets empty clan_id and its wins can't be attributed.
# Seed-driven map variation still drives strategic divergence.
MAP_SIZE="$MAP_SIZE" \
NUM_PLAYERS="$NUM_PLAYERS" \
PARALLEL="$PARALLEL" \
MCTS_DECISION_BUDGET_MS="${MCTS_DECISION_BUDGET_MS:-2000}" \
SAFETY_TIMEOUT_OVERRIDE="${SAFETY_TIMEOUT_OVERRIDE:-}" \
AI_USE_MCTS=true \
AI_PIN_PERSONALITY_P0=ironhold \
AI_PIN_PERSONALITY_P1=blackhammer \
AI_PIN_PERSONALITY_P2=goldvein \
AI_PIN_PERSONALITY_P3=deepforge \
AI_PIN_PERSONALITY_P4=runesmith \
bash "$REPO_ROOT/tools/autoplay-batch.sh" "$SEEDS" "$TURN_LIMIT" "$PARENT" \
> "$PARENT/batch.log" 2>&1
batch_rc=$?
printf 'batch_exit=%d\n' "$batch_rc" >> "$MARKER"
echo -e "${BLUE}computing ultimate_stress verdict…${NC}"
python3 "$REPO_ROOT/tools/checklist-report.py" ultimate_stress "$PARENT" \
> "$PARENT/verdict.json" 2> "$PARENT/gate.stderr"
gate_rc=$?
printf 'gate_exit=%d\n' "$gate_rc" >> "$MARKER"
printf 'finished=%s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$MARKER"
printf 'parent=%s\n' "$PARENT" >> "$MARKER"
if [ "$gate_rc" -eq 0 ]; then
echo -e "${GREEN}ultimate_stress: PASS${NC}"
else
echo -e "${RED}ultimate_stress: FAIL${NC} (gate_exit=$gate_rc)"
echo -e "${DIM}see: $PARENT/verdict.json${NC}"
fi
exit $gate_rc