377 lines
13 KiB
Python
377 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for personality win-rate tracking in autoplay-report and checklist-report."""
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
import io
|
|
import importlib.util as _iu
|
|
from pathlib import Path
|
|
|
|
_TOOLS = Path(__file__).parent
|
|
|
|
|
|
def _load(name: str, stem: str):
|
|
path = _TOOLS / f"{stem}.py"
|
|
spec = _iu.spec_from_file_location(name, path)
|
|
mod = _iu.module_from_spec(spec) # type: ignore[arg-type]
|
|
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
|
return mod
|
|
|
|
|
|
ar = _load("autoplay_report", "autoplay-report")
|
|
cr = _load("checklist_report", "checklist-report")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _row(outcome: str, winner_personality: str, winner_index: int = 1) -> dict:
|
|
return {
|
|
"outcome": outcome,
|
|
"winner_index": winner_index,
|
|
"winner_personality": winner_personality,
|
|
"turns_played": 200,
|
|
"victory_type": "score" if outcome == "victory" else "",
|
|
"wall_clock_sec": 10.0,
|
|
"event_count": 50,
|
|
"invariant_violations": 0,
|
|
"p0_pop_peak": 20,
|
|
"p0_gold_peak": 100,
|
|
"agg_total_combats": 30,
|
|
}
|
|
|
|
|
|
def _result(outcome: str, winner_personality: str) -> tuple[int, dict]:
|
|
return (1, {
|
|
"outcome": outcome,
|
|
"winner_personality": winner_personality,
|
|
"winner_index": 1,
|
|
"player_clans": {},
|
|
"turns": 200,
|
|
"pop_peak": 20,
|
|
"p0_tiles": 25,
|
|
"p0_techs": 22,
|
|
"combats": 120,
|
|
"happy_distinct": 4,
|
|
"imp_events": 6,
|
|
"loot_events": 2,
|
|
"gate_events": 1,
|
|
"both_p100": True,
|
|
"invariants": 0,
|
|
"script_errors": 0,
|
|
})
|
|
|
|
|
|
def _result_with_clans(
|
|
outcome: str, winner_index: int, player_clans: dict[str, str]
|
|
) -> tuple[int, dict]:
|
|
winner_clan = player_clans.get(str(winner_index), "") if outcome == "victory" else ""
|
|
return (1, {
|
|
"outcome": outcome,
|
|
"winner_personality": winner_clan,
|
|
"winner_index": winner_index,
|
|
"player_clans": dict(player_clans),
|
|
"turns": 200, "pop_peak": 20,
|
|
"p0_tiles": 25, "p0_techs": 22, "combats": 120,
|
|
"happy_distinct": 4, "imp_events": 6, "loot_events": 2, "gate_events": 1,
|
|
"both_p100": True, "invariants": 0, "script_errors": 0,
|
|
})
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# autoplay-report: build_personality_win_table
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_win_table_empty_rows():
|
|
assert ar.build_personality_win_table([]) == {}
|
|
|
|
|
|
def test_win_table_skips_empty_personality():
|
|
rows = [_row("victory", ""), _row("max_turns", "")]
|
|
assert ar.build_personality_win_table(rows) == {}
|
|
|
|
|
|
def test_win_table_single_clan_all_wins():
|
|
rows = [_row("victory", "ironhold"), _row("victory", "ironhold")]
|
|
table = ar.build_personality_win_table(rows)
|
|
assert table["ironhold"]["wins"] == 2
|
|
assert table["ironhold"]["appearances"] == 2
|
|
assert table["ironhold"]["losses"] == 0
|
|
|
|
|
|
def test_win_table_single_clan_no_wins():
|
|
rows = [_row("max_turns", "blackhammer"), _row("max_turns", "blackhammer")]
|
|
table = ar.build_personality_win_table(rows)
|
|
assert table["blackhammer"]["wins"] == 0
|
|
assert table["blackhammer"]["appearances"] == 2
|
|
assert table["blackhammer"]["losses"] == 2
|
|
|
|
|
|
def test_win_table_multiple_clans():
|
|
rows = [
|
|
_row("victory", "ironhold"),
|
|
_row("victory", "goldvein"),
|
|
_row("max_turns", "ironhold"),
|
|
_row("victory", "runesmith"),
|
|
]
|
|
table = ar.build_personality_win_table(rows)
|
|
assert table["ironhold"]["wins"] == 1
|
|
assert table["ironhold"]["appearances"] == 2
|
|
assert table["goldvein"]["wins"] == 1
|
|
assert table["goldvein"]["appearances"] == 1
|
|
assert table["runesmith"]["wins"] == 1
|
|
assert table["runesmith"]["appearances"] == 1
|
|
|
|
|
|
def test_win_table_losses_computed():
|
|
rows = [_row("victory", "deepforge"), _row("max_turns", "deepforge")]
|
|
table = ar.build_personality_win_table(rows)
|
|
assert table["deepforge"]["losses"] == 1
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# autoplay-report: print_personality_summary
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_print_personality_summary_no_data():
|
|
out = io.StringIO()
|
|
ar.print_personality_summary([], out=out)
|
|
assert "no data" in out.getvalue()
|
|
|
|
|
|
def test_print_personality_summary_imbalanced_flag():
|
|
rows = [_row("victory", "blackhammer")] * 3 + [_row("max_turns", "blackhammer")]
|
|
out = io.StringIO()
|
|
ar.print_personality_summary(rows, out=out)
|
|
assert "IMBALANCED" in out.getvalue()
|
|
assert "blackhammer" in out.getvalue()
|
|
|
|
|
|
def test_print_personality_summary_balanced_no_flag():
|
|
rows = [_row("victory", "runesmith"), _row("max_turns", "runesmith")]
|
|
out = io.StringIO()
|
|
ar.print_personality_summary(rows, out=out)
|
|
assert "IMBALANCED" not in out.getvalue()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# checklist-report: personality_win_balance
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_pwb_no_data():
|
|
ok, detail = cr.personality_win_balance([])
|
|
assert ok is True
|
|
assert "no data" in detail
|
|
|
|
|
|
def test_pwb_empty_personality_ignored():
|
|
results = [_result("victory", ""), _result("max_turns", "")]
|
|
ok, detail = cr.personality_win_balance(results)
|
|
assert ok is True
|
|
assert "no data" in detail
|
|
|
|
|
|
def test_pwb_all_wins_imbalanced():
|
|
results = [_result("victory", "ironhold")] * 3
|
|
ok, detail = cr.personality_win_balance(results)
|
|
assert ok is False
|
|
assert "ironhold" in detail
|
|
|
|
|
|
def test_pwb_exactly_50_percent_passes():
|
|
results = [_result("victory", "goldvein"), _result("max_turns", "goldvein")]
|
|
ok, detail = cr.personality_win_balance(results)
|
|
assert ok is True
|
|
|
|
|
|
def test_pwb_mixed_clans_balanced():
|
|
results = [
|
|
_result("victory", "ironhold"),
|
|
_result("max_turns", "ironhold"),
|
|
_result("victory", "runesmith"),
|
|
_result("max_turns", "runesmith"),
|
|
]
|
|
ok, detail = cr.personality_win_balance(results)
|
|
assert ok is True
|
|
assert "ironhold" in detail
|
|
assert "runesmith" in detail
|
|
|
|
|
|
def test_pwb_one_clan_over_threshold():
|
|
results = [
|
|
_result("victory", "blackhammer"),
|
|
_result("victory", "blackhammer"),
|
|
_result("max_turns", "blackhammer"),
|
|
_result("victory", "runesmith"),
|
|
_result("max_turns", "runesmith"),
|
|
]
|
|
ok, detail = cr.personality_win_balance(results)
|
|
assert ok is False
|
|
assert "blackhammer" in detail
|
|
|
|
|
|
def test_pwb_detail_contains_win_counts():
|
|
results = [_result("victory", "deepforge"), _result("max_turns", "deepforge")]
|
|
_, detail = cr.personality_win_balance(results)
|
|
assert "1/2" in detail
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# autoplay-report: build_per_clan_stats
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _row_with_clans(
|
|
outcome: str,
|
|
winner_index: int,
|
|
player_clans: dict,
|
|
snapshots: dict | None = None,
|
|
**extra,
|
|
) -> dict:
|
|
winner_clan = player_clans.get(str(winner_index), "") if outcome == "victory" else ""
|
|
base = _row(outcome, winner_clan, winner_index=winner_index)
|
|
base["_player_clans"] = dict(player_clans)
|
|
base["_snapshots"] = snapshots or {}
|
|
base["winner_index"] = winner_index
|
|
base.update(extra)
|
|
return base
|
|
|
|
|
|
def test_per_clan_stats_uses_player_clans():
|
|
# 2 games, ironhold vs goldvein in both. ironhold wins one, goldvein wins
|
|
# one → each has 2 appearances, 1 win.
|
|
rows = [
|
|
_row_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"}),
|
|
_row_with_clans("victory", 1, {"0": "ironhold", "1": "goldvein"}),
|
|
]
|
|
stats = ar.build_per_clan_stats(rows)
|
|
assert stats["ironhold"]["appearances"] == 2
|
|
assert stats["ironhold"]["wins"] == 1
|
|
assert stats["goldvein"]["appearances"] == 2
|
|
assert stats["goldvein"]["wins"] == 1
|
|
# Untouched clans present with 0 apps
|
|
assert stats["deepforge"]["appearances"] == 0
|
|
assert stats["deepforge"]["win_rate"] is None
|
|
|
|
|
|
def test_per_clan_stats_fallback_winner_only():
|
|
# No player_clans on row → fall back to winner_personality.
|
|
rows = [_row("victory", "blackhammer"), _row("max_turns", "")]
|
|
stats = ar.build_per_clan_stats(rows)
|
|
assert stats["blackhammer"]["appearances"] == 1
|
|
assert stats["blackhammer"]["wins"] == 1
|
|
|
|
|
|
def test_per_clan_stats_snapshot_medians():
|
|
snap = {
|
|
100: {"player_stats": {"0": {"mil": 5, "gold": 100}, "1": {"mil": 8, "gold": 200}}},
|
|
200: {"player_stats": {"0": {"mil": 15, "gold": 500}, "1": {"mil": 20, "gold": 600}}},
|
|
}
|
|
rows = [
|
|
_row_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"}, snapshots=snap),
|
|
]
|
|
stats = ar.build_per_clan_stats(rows)
|
|
assert stats["ironhold"]["median_unit_count_100"] == 5.0
|
|
assert stats["ironhold"]["median_gold_200"] == 500.0
|
|
assert stats["goldvein"]["median_unit_count_200"] == 20.0
|
|
# T300 absent → None
|
|
assert stats["ironhold"]["median_unit_count_300"] is None
|
|
|
|
|
|
def test_render_per_clan_table_runs():
|
|
out = io.StringIO()
|
|
rows = [_row_with_clans("victory", 1, {"0": "ironhold", "1": "goldvein"})]
|
|
stats = ar.render_per_clan_table(rows, out=out)
|
|
text = out.getvalue()
|
|
assert "per-clan stats:" in text
|
|
assert "ironhold" in text
|
|
assert "goldvein" in text
|
|
assert stats["goldvein"]["wins"] == 1
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# checklist-report: personality_win_balance_verdict (CI-consumable)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_verdict_no_data_passes():
|
|
v = cr.personality_win_balance_verdict([])
|
|
assert v["pass"] is True
|
|
assert v["reasons"] == []
|
|
assert v["sample_size"] == 0
|
|
assert set(v["clans"].keys()) == set(cr.KNOWN_CLAN_IDS)
|
|
|
|
|
|
def test_verdict_fails_on_win_rate_over_50():
|
|
results = [_result("victory", "ironhold")] * 3
|
|
v = cr.personality_win_balance_verdict(results)
|
|
assert v["pass"] is False
|
|
assert any("ironhold" in r and "100.0%" in r for r in v["reasons"])
|
|
|
|
|
|
def test_verdict_passes_balanced_with_clans():
|
|
# Every game has 2 AI clans via player_clans. ironhold and goldvein each
|
|
# win 1 of 2 games → 50% each.
|
|
results = [
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"}),
|
|
_result_with_clans("victory", 1, {"0": "ironhold", "1": "goldvein"}),
|
|
]
|
|
v = cr.personality_win_balance_verdict(results)
|
|
assert v["pass"] is True
|
|
assert v["clans"]["ironhold"]["appearances"] == 2
|
|
assert v["clans"]["ironhold"]["wins"] == 1
|
|
|
|
|
|
def test_verdict_fails_on_zero_wins_with_many_apps():
|
|
# deepforge appears 5 times, never wins → second clause trips.
|
|
results = [
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
|
|
]
|
|
v = cr.personality_win_balance_verdict(results)
|
|
assert v["pass"] is False
|
|
# ironhold fails on the 100% clause, deepforge fails on the zero-wins clause
|
|
assert any("deepforge" in r and "0 wins" in r for r in v["reasons"])
|
|
|
|
|
|
def test_verdict_tolerates_zero_wins_under_threshold():
|
|
# deepforge appears 4 times, zero wins → below threshold, not a failure
|
|
# ironhold wins all 4 → fails on >50% clause.
|
|
results = [
|
|
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
|
|
] * 4
|
|
v = cr.personality_win_balance_verdict(results)
|
|
deepforge_reasons = [r for r in v["reasons"] if "deepforge" in r and "0 wins" in r]
|
|
assert deepforge_reasons == []
|
|
|
|
|
|
def test_verdict_missing_clans_reported():
|
|
# Only ironhold/goldvein appear — others must be listed as missing.
|
|
results = [_result_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"})]
|
|
v = cr.personality_win_balance_verdict(results)
|
|
missing = set(v["missing_clans"])
|
|
assert "deepforge" in missing
|
|
assert "blackhammer" in missing
|
|
assert "runesmith" in missing
|
|
assert "ironhold" not in missing
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Runner
|
|
# ---------------------------------------------------------------------------
|
|
|
|
if __name__ == "__main__":
|
|
tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
|
|
passed = failed = 0
|
|
for t in tests:
|
|
try:
|
|
t()
|
|
print(f" PASS {t.__name__}")
|
|
passed += 1
|
|
except Exception as exc:
|
|
print(f" FAIL {t.__name__}: {exc}")
|
|
failed += 1
|
|
print(f"\n{passed} passed, {failed} failed")
|
|
sys.exit(0 if failed == 0 else 1)
|