215 lines
7.7 KiB
Python
215 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
||
"""Tests for matchup_balance and ultimate_stress verdicts in checklist-report.py.
|
||
|
||
Covers the two new gates added 2026-04-17 alongside `tools/matchup-grid.sh`
|
||
(C(5,2)=10 1v1 pairings) and `tools/huge-map-5clan.sh` (5-clan huge-map
|
||
ultimate AI lookahead stress test). Both functions consume
|
||
`list[tuple[int, dict]]` rows exactly like the pre-existing
|
||
`personality_win_balance_verdict`, so the fixture style mirrors
|
||
`tools/test_personality_winrate.py`.
|
||
|
||
Run: python3 tools/test_matchup_and_ultimate.py
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import importlib.util as _iu
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
_TOOLS = Path(__file__).parent
|
||
|
||
|
||
def _load(name: str, stem: str):
|
||
path = _TOOLS / f"{stem}.py"
|
||
spec = _iu.spec_from_file_location(name, path)
|
||
mod = _iu.module_from_spec(spec) # type: ignore[arg-type]
|
||
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
||
return mod
|
||
|
||
|
||
cr = _load("checklist_report", "checklist-report")
|
||
|
||
|
||
def _row(outcome: str, clans: dict, winner_index: int = 1, turn: int = 200) -> dict:
|
||
return {
|
||
"outcome": outcome,
|
||
"winner_index": winner_index,
|
||
"winner_personality": clans.get(str(winner_index), ""),
|
||
"player_clans": clans,
|
||
"turn": turn,
|
||
}
|
||
|
||
|
||
def _grid_balanced_1v1() -> list:
|
||
clans_ids = ["ironhold", "goldvein", "blackhammer", "deepforge", "runesmith"]
|
||
rows: list = []
|
||
seed = 0
|
||
for i in range(len(clans_ids)):
|
||
for j in range(i + 1, len(clans_ids)):
|
||
a, b = clans_ids[i], clans_ids[j]
|
||
rows.append((seed, _row("victory", {"0": "", "1": a}, winner_index=1, turn=180))); seed += 1
|
||
rows.append((seed, _row("victory", {"0": "", "1": b}, winner_index=1, turn=180))); seed += 1
|
||
rows.append((seed, _row("in_progress", {"0": "", "1": a}, winner_index=-1, turn=300))); seed += 1
|
||
rows.append((seed, _row("in_progress", {"0": "", "1": b}, winner_index=-1, turn=300))); seed += 1
|
||
return rows
|
||
|
||
|
||
def _grid_dominant_clan() -> list:
|
||
rows: list = []
|
||
seed = 0
|
||
for opp in ["goldvein", "blackhammer", "deepforge", "runesmith"]:
|
||
for _ in range(10):
|
||
rows.append((seed, _row("victory", {"0": "", "1": "ironhold"}, winner_index=1, turn=150))); seed += 1
|
||
rows.append((seed, _row("in_progress", {"0": "", "1": opp}, winner_index=-1, turn=300))); seed += 1
|
||
return rows
|
||
|
||
|
||
def _grid_undersampled() -> list:
|
||
rows: list = []
|
||
for i, c in enumerate(["ironhold", "goldvein", "blackhammer", "deepforge", "runesmith"]):
|
||
rows.append((i * 2, _row("victory", {"0": "", "1": c}, winner_index=1, turn=200)))
|
||
rows.append((i * 2 + 1, _row("victory", {"0": "", "1": c}, winner_index=1, turn=200)))
|
||
return rows
|
||
|
||
|
||
def _ultimate_balanced() -> list:
|
||
wins = ["ironhold", "goldvein", "blackhammer", "deepforge", "ironhold",
|
||
"goldvein", "blackhammer", "ironhold", "deepforge", "goldvein"]
|
||
rows: list = []
|
||
for seed, winner in enumerate(wins):
|
||
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
||
"3": "deepforge", "4": "runesmith"}
|
||
w_slot = next(k for k, v in clans.items() if v == winner)
|
||
rows.append((seed, _row("victory", clans, winner_index=int(w_slot), turn=350)))
|
||
return rows
|
||
|
||
|
||
def _ultimate_degenerate_single_winner() -> list:
|
||
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
||
"3": "deepforge", "4": "runesmith"}
|
||
return [(s, _row("victory", clans, winner_index=0, turn=350)) for s in range(10)]
|
||
|
||
|
||
def _ultimate_all_stalemates() -> list:
|
||
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
||
"3": "deepforge", "4": "runesmith"}
|
||
return [(s, _row("in_progress", clans, winner_index=-1, turn=500)) for s in range(10)]
|
||
|
||
|
||
def _ultimate_snap_games() -> list:
|
||
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
||
"3": "deepforge", "4": "runesmith"}
|
||
wins = ["ironhold", "goldvein"] * 5
|
||
rows: list = []
|
||
for seed, winner in enumerate(wins):
|
||
w_slot = next(k for k, v in clans.items() if v == winner)
|
||
rows.append((seed, _row("victory", clans, winner_index=int(w_slot), turn=25)))
|
||
rows[0][1]["turn"] = 500
|
||
return rows
|
||
|
||
|
||
def _ultimate_too_few_samples() -> list:
|
||
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
||
"3": "deepforge", "4": "runesmith"}
|
||
return [(s, _row("victory", clans, winner_index=0, turn=300)) for s in range(4)]
|
||
|
||
|
||
_fails: list = []
|
||
_passes = 0
|
||
|
||
|
||
def _check(name: str, cond: bool, detail: str = "") -> None:
|
||
global _passes
|
||
if cond:
|
||
_passes += 1
|
||
print(f" PASS {name}")
|
||
else:
|
||
_fails.append(name)
|
||
print(f" FAIL {name}" + (f" — {detail}" if detail else ""))
|
||
|
||
|
||
print("matchup_balance_verdict")
|
||
|
||
v = cr.matchup_balance_verdict(_grid_balanced_1v1())
|
||
_check("balanced grid passes", v["pass"], f"reasons={v['reasons']}")
|
||
_check("balanced grid reports 40 sample size", v["sample_size"] == 40)
|
||
_check("balanced grid lists all 5 clans", set(v["clans"].keys()) >= set(cr.KNOWN_CLAN_IDS))
|
||
_check("balanced grid has no missing clans", v["missing_clans"] == [])
|
||
for c in cr.KNOWN_CLAN_IDS:
|
||
_check(
|
||
f"balanced: {c} win_rate <= 50%",
|
||
v["clans"][c]["win_rate_pct"] <= 50,
|
||
f"{c} saw {v['clans'][c]['win_rate_pct']}%",
|
||
)
|
||
|
||
v_dom = cr.matchup_balance_verdict(_grid_dominant_clan())
|
||
_check("dominant-clan grid fails", not v_dom["pass"])
|
||
_check(
|
||
"dominant-clan reason mentions ironhold + 50%",
|
||
any("ironhold" in r and "50" in r for r in v_dom["reasons"]),
|
||
f"reasons={v_dom['reasons']}",
|
||
)
|
||
|
||
v_under = cr.matchup_balance_verdict(_grid_undersampled())
|
||
_check("undersampled grid fails", not v_under["pass"])
|
||
_check(
|
||
"undersampled reason mentions 'grid incomplete'",
|
||
any("grid incomplete" in r or "appearances" in r for r in v_under["reasons"]),
|
||
f"reasons={v_under['reasons']}",
|
||
)
|
||
|
||
|
||
print("\nultimate_stress_verdict")
|
||
|
||
v_ult = cr.ultimate_stress_verdict(_ultimate_balanced())
|
||
_check("balanced ultimate passes", v_ult["pass"], f"reasons={v_ult['reasons']}")
|
||
_check("balanced ultimate has 10 samples", v_ult["sample_size"] == 10)
|
||
_check("balanced ultimate victory_count == 10", v_ult["victory_count"] == 10)
|
||
_check(
|
||
"balanced ultimate distinct_winners >= 2",
|
||
len(v_ult["distinct_winners"]) >= 2,
|
||
f"got {v_ult['distinct_winners']}",
|
||
)
|
||
_check("balanced ultimate median_turn > 0", v_ult["median_turn"] > 0)
|
||
|
||
v_single = cr.ultimate_stress_verdict(_ultimate_degenerate_single_winner())
|
||
_check("single-winner sweep fails", not v_single["pass"])
|
||
_check(
|
||
"single-winner reason mentions degenerate",
|
||
any("degenerate" in r for r in v_single["reasons"]),
|
||
f"reasons={v_single['reasons']}",
|
||
)
|
||
|
||
v_stale = cr.ultimate_stress_verdict(_ultimate_all_stalemates())
|
||
_check("all-stalemate fails", not v_stale["pass"])
|
||
_check(
|
||
"all-stalemate reason mentions stalling",
|
||
any("stalling" in r or "decisive" in r for r in v_stale["reasons"]),
|
||
f"reasons={v_stale['reasons']}",
|
||
)
|
||
|
||
v_snap = cr.ultimate_stress_verdict(_ultimate_snap_games())
|
||
_check("snap-ending fails", not v_snap["pass"])
|
||
_check(
|
||
"snap-ending reason mentions map not being used",
|
||
any("snap-ending" in r or "not being used" in r or "median" in r
|
||
for r in v_snap["reasons"]),
|
||
f"reasons={v_snap['reasons']}",
|
||
)
|
||
|
||
v_few = cr.ultimate_stress_verdict(_ultimate_too_few_samples())
|
||
_check("too-few-samples fails", not v_few["pass"])
|
||
_check(
|
||
"too-few-samples reason mentions sample size",
|
||
any("SEEDS" in r or "samples" in r or "games" in r for r in v_few["reasons"]),
|
||
f"reasons={v_few['reasons']}",
|
||
)
|
||
|
||
|
||
print("")
|
||
if _fails:
|
||
print(f"FAILED ({len(_fails)} / {_passes + len(_fails)})")
|
||
for n in _fails:
|
||
print(f" × {n}")
|
||
sys.exit(1)
|
||
print(f"PASS {_passes} / {_passes}")
|