216 lines
7.7 KiB
Python
216 lines
7.7 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""Tests for matchup_balance and ultimate_stress verdicts in checklist-report.py.
|
|||
|
|
|
|||
|
|
Covers the two new gates added 2026-04-17 alongside `tools/matchup-grid.sh`
|
|||
|
|
(C(5,2)=10 1v1 pairings) and `tools/huge-map-5clan.sh` (5-clan huge-map
|
|||
|
|
ultimate AI lookahead stress test). Both functions consume
|
|||
|
|
`list[tuple[int, dict]]` rows exactly like the pre-existing
|
|||
|
|
`personality_win_balance_verdict`, so the fixture style mirrors
|
|||
|
|
`tools/test_personality_winrate.py`.
|
|||
|
|
|
|||
|
|
Run: python3 tools/test_matchup_and_ultimate.py
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import importlib.util as _iu
|
|||
|
|
import sys
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
_TOOLS = Path(__file__).parent
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _load(name: str, stem: str):
|
|||
|
|
path = _TOOLS / f"{stem}.py"
|
|||
|
|
spec = _iu.spec_from_file_location(name, path)
|
|||
|
|
mod = _iu.module_from_spec(spec) # type: ignore[arg-type]
|
|||
|
|
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
|||
|
|
return mod
|
|||
|
|
|
|||
|
|
|
|||
|
|
cr = _load("checklist_report", "checklist-report")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _row(outcome: str, clans: dict, winner_index: int = 1, turn: int = 200) -> dict:
|
|||
|
|
return {
|
|||
|
|
"outcome": outcome,
|
|||
|
|
"winner_index": winner_index,
|
|||
|
|
"winner_personality": clans.get(str(winner_index), ""),
|
|||
|
|
"player_clans": clans,
|
|||
|
|
"turn": turn,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _grid_balanced_1v1() -> list:
|
|||
|
|
clans_ids = ["ironhold", "goldvein", "blackhammer", "deepforge", "runesmith"]
|
|||
|
|
rows: list = []
|
|||
|
|
seed = 0
|
|||
|
|
for i in range(len(clans_ids)):
|
|||
|
|
for j in range(i + 1, len(clans_ids)):
|
|||
|
|
a, b = clans_ids[i], clans_ids[j]
|
|||
|
|
rows.append((seed, _row("victory", {"0": "", "1": a}, winner_index=1, turn=180))); seed += 1
|
|||
|
|
rows.append((seed, _row("victory", {"0": "", "1": b}, winner_index=1, turn=180))); seed += 1
|
|||
|
|
rows.append((seed, _row("in_progress", {"0": "", "1": a}, winner_index=-1, turn=300))); seed += 1
|
|||
|
|
rows.append((seed, _row("in_progress", {"0": "", "1": b}, winner_index=-1, turn=300))); seed += 1
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _grid_dominant_clan() -> list:
|
|||
|
|
rows: list = []
|
|||
|
|
seed = 0
|
|||
|
|
for opp in ["goldvein", "blackhammer", "deepforge", "runesmith"]:
|
|||
|
|
for _ in range(10):
|
|||
|
|
rows.append((seed, _row("victory", {"0": "", "1": "ironhold"}, winner_index=1, turn=150))); seed += 1
|
|||
|
|
rows.append((seed, _row("in_progress", {"0": "", "1": opp}, winner_index=-1, turn=300))); seed += 1
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _grid_undersampled() -> list:
|
|||
|
|
rows: list = []
|
|||
|
|
for i, c in enumerate(["ironhold", "goldvein", "blackhammer", "deepforge", "runesmith"]):
|
|||
|
|
rows.append((i * 2, _row("victory", {"0": "", "1": c}, winner_index=1, turn=200)))
|
|||
|
|
rows.append((i * 2 + 1, _row("victory", {"0": "", "1": c}, winner_index=1, turn=200)))
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _ultimate_balanced() -> list:
|
|||
|
|
wins = ["ironhold", "goldvein", "blackhammer", "deepforge", "ironhold",
|
|||
|
|
"goldvein", "blackhammer", "ironhold", "deepforge", "goldvein"]
|
|||
|
|
rows: list = []
|
|||
|
|
for seed, winner in enumerate(wins):
|
|||
|
|
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
|||
|
|
"3": "deepforge", "4": "runesmith"}
|
|||
|
|
w_slot = next(k for k, v in clans.items() if v == winner)
|
|||
|
|
rows.append((seed, _row("victory", clans, winner_index=int(w_slot), turn=350)))
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _ultimate_degenerate_single_winner() -> list:
|
|||
|
|
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
|||
|
|
"3": "deepforge", "4": "runesmith"}
|
|||
|
|
return [(s, _row("victory", clans, winner_index=0, turn=350)) for s in range(10)]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _ultimate_all_stalemates() -> list:
|
|||
|
|
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
|||
|
|
"3": "deepforge", "4": "runesmith"}
|
|||
|
|
return [(s, _row("in_progress", clans, winner_index=-1, turn=500)) for s in range(10)]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _ultimate_snap_games() -> list:
|
|||
|
|
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
|||
|
|
"3": "deepforge", "4": "runesmith"}
|
|||
|
|
wins = ["ironhold", "goldvein"] * 5
|
|||
|
|
rows: list = []
|
|||
|
|
for seed, winner in enumerate(wins):
|
|||
|
|
w_slot = next(k for k, v in clans.items() if v == winner)
|
|||
|
|
rows.append((seed, _row("victory", clans, winner_index=int(w_slot), turn=25)))
|
|||
|
|
rows[0][1]["turn"] = 500
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _ultimate_too_few_samples() -> list:
|
|||
|
|
clans = {"0": "ironhold", "1": "goldvein", "2": "blackhammer",
|
|||
|
|
"3": "deepforge", "4": "runesmith"}
|
|||
|
|
return [(s, _row("victory", clans, winner_index=0, turn=300)) for s in range(4)]
|
|||
|
|
|
|||
|
|
|
|||
|
|
_fails: list = []
|
|||
|
|
_passes = 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _check(name: str, cond: bool, detail: str = "") -> None:
|
|||
|
|
global _passes
|
|||
|
|
if cond:
|
|||
|
|
_passes += 1
|
|||
|
|
print(f" PASS {name}")
|
|||
|
|
else:
|
|||
|
|
_fails.append(name)
|
|||
|
|
print(f" FAIL {name}" + (f" — {detail}" if detail else ""))
|
|||
|
|
|
|||
|
|
|
|||
|
|
print("matchup_balance_verdict")
|
|||
|
|
|
|||
|
|
v = cr.matchup_balance_verdict(_grid_balanced_1v1())
|
|||
|
|
_check("balanced grid passes", v["pass"], f"reasons={v['reasons']}")
|
|||
|
|
_check("balanced grid reports 40 sample size", v["sample_size"] == 40)
|
|||
|
|
_check("balanced grid lists all 5 clans", set(v["clans"].keys()) >= set(cr.KNOWN_CLAN_IDS))
|
|||
|
|
_check("balanced grid has no missing clans", v["missing_clans"] == [])
|
|||
|
|
for c in cr.KNOWN_CLAN_IDS:
|
|||
|
|
_check(
|
|||
|
|
f"balanced: {c} win_rate <= 50%",
|
|||
|
|
v["clans"][c]["win_rate_pct"] <= 50,
|
|||
|
|
f"{c} saw {v['clans'][c]['win_rate_pct']}%",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
v_dom = cr.matchup_balance_verdict(_grid_dominant_clan())
|
|||
|
|
_check("dominant-clan grid fails", not v_dom["pass"])
|
|||
|
|
_check(
|
|||
|
|
"dominant-clan reason mentions ironhold + 50%",
|
|||
|
|
any("ironhold" in r and "50" in r for r in v_dom["reasons"]),
|
|||
|
|
f"reasons={v_dom['reasons']}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
v_under = cr.matchup_balance_verdict(_grid_undersampled())
|
|||
|
|
_check("undersampled grid fails", not v_under["pass"])
|
|||
|
|
_check(
|
|||
|
|
"undersampled reason mentions 'grid incomplete'",
|
|||
|
|
any("grid incomplete" in r or "appearances" in r for r in v_under["reasons"]),
|
|||
|
|
f"reasons={v_under['reasons']}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
print("\nultimate_stress_verdict")
|
|||
|
|
|
|||
|
|
v_ult = cr.ultimate_stress_verdict(_ultimate_balanced())
|
|||
|
|
_check("balanced ultimate passes", v_ult["pass"], f"reasons={v_ult['reasons']}")
|
|||
|
|
_check("balanced ultimate has 10 samples", v_ult["sample_size"] == 10)
|
|||
|
|
_check("balanced ultimate victory_count == 10", v_ult["victory_count"] == 10)
|
|||
|
|
_check(
|
|||
|
|
"balanced ultimate distinct_winners >= 2",
|
|||
|
|
len(v_ult["distinct_winners"]) >= 2,
|
|||
|
|
f"got {v_ult['distinct_winners']}",
|
|||
|
|
)
|
|||
|
|
_check("balanced ultimate median_turn > 0", v_ult["median_turn"] > 0)
|
|||
|
|
|
|||
|
|
v_single = cr.ultimate_stress_verdict(_ultimate_degenerate_single_winner())
|
|||
|
|
_check("single-winner sweep fails", not v_single["pass"])
|
|||
|
|
_check(
|
|||
|
|
"single-winner reason mentions degenerate",
|
|||
|
|
any("degenerate" in r for r in v_single["reasons"]),
|
|||
|
|
f"reasons={v_single['reasons']}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
v_stale = cr.ultimate_stress_verdict(_ultimate_all_stalemates())
|
|||
|
|
_check("all-stalemate fails", not v_stale["pass"])
|
|||
|
|
_check(
|
|||
|
|
"all-stalemate reason mentions stalling",
|
|||
|
|
any("stalling" in r or "decisive" in r for r in v_stale["reasons"]),
|
|||
|
|
f"reasons={v_stale['reasons']}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
v_snap = cr.ultimate_stress_verdict(_ultimate_snap_games())
|
|||
|
|
_check("snap-ending fails", not v_snap["pass"])
|
|||
|
|
_check(
|
|||
|
|
"snap-ending reason mentions map not being used",
|
|||
|
|
any("snap-ending" in r or "not being used" in r or "median" in r
|
|||
|
|
for r in v_snap["reasons"]),
|
|||
|
|
f"reasons={v_snap['reasons']}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
v_few = cr.ultimate_stress_verdict(_ultimate_too_few_samples())
|
|||
|
|
_check("too-few-samples fails", not v_few["pass"])
|
|||
|
|
_check(
|
|||
|
|
"too-few-samples reason mentions sample size",
|
|||
|
|
any("SEEDS" in r or "samples" in r or "games" in r for r in v_few["reasons"]),
|
|||
|
|
f"reasons={v_few['reasons']}",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
print("")
|
|||
|
|
if _fails:
|
|||
|
|
print(f"FAILED ({len(_fails)} / {_passes + len(_fails)})")
|
|||
|
|
for n in _fails:
|
|||
|
|
print(f" × {n}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
print(f"PASS {_passes} / {_passes}")
|