magicciv/tools/test_personality_winrate.py
Natalie 0dc66eb0d4 feat(@projects/@magic-civilization): complete mcts and wonder tracking milestones
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-17 01:50:29 -07:00

377 lines
13 KiB
Python

#!/usr/bin/env python3
"""Tests for personality win-rate tracking in autoplay-report and checklist-report."""
from __future__ import annotations
import sys
import io
import importlib.util as _iu
from pathlib import Path
_TOOLS = Path(__file__).parent
def _load(name: str, stem: str):
path = _TOOLS / f"{stem}.py"
spec = _iu.spec_from_file_location(name, path)
mod = _iu.module_from_spec(spec) # type: ignore[arg-type]
spec.loader.exec_module(mod) # type: ignore[union-attr]
return mod
ar = _load("autoplay_report", "autoplay-report")
cr = _load("checklist_report", "checklist-report")
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
def _row(outcome: str, winner_personality: str, winner_index: int = 1) -> dict:
return {
"outcome": outcome,
"winner_index": winner_index,
"winner_personality": winner_personality,
"turns_played": 200,
"victory_type": "score" if outcome == "victory" else "",
"wall_clock_sec": 10.0,
"event_count": 50,
"invariant_violations": 0,
"p0_pop_peak": 20,
"p0_gold_peak": 100,
"agg_total_combats": 30,
}
def _result(outcome: str, winner_personality: str) -> tuple[int, dict]:
return (1, {
"outcome": outcome,
"winner_personality": winner_personality,
"winner_index": 1,
"player_clans": {},
"turns": 200,
"pop_peak": 20,
"p0_tiles": 25,
"p0_techs": 22,
"combats": 120,
"happy_distinct": 4,
"imp_events": 6,
"loot_events": 2,
"gate_events": 1,
"both_p100": True,
"invariants": 0,
"script_errors": 0,
})
def _result_with_clans(
outcome: str, winner_index: int, player_clans: dict[str, str]
) -> tuple[int, dict]:
winner_clan = player_clans.get(str(winner_index), "") if outcome == "victory" else ""
return (1, {
"outcome": outcome,
"winner_personality": winner_clan,
"winner_index": winner_index,
"player_clans": dict(player_clans),
"turns": 200, "pop_peak": 20,
"p0_tiles": 25, "p0_techs": 22, "combats": 120,
"happy_distinct": 4, "imp_events": 6, "loot_events": 2, "gate_events": 1,
"both_p100": True, "invariants": 0, "script_errors": 0,
})
# ---------------------------------------------------------------------------
# autoplay-report: build_personality_win_table
# ---------------------------------------------------------------------------
def test_win_table_empty_rows():
assert ar.build_personality_win_table([]) == {}
def test_win_table_skips_empty_personality():
rows = [_row("victory", ""), _row("max_turns", "")]
assert ar.build_personality_win_table(rows) == {}
def test_win_table_single_clan_all_wins():
rows = [_row("victory", "ironhold"), _row("victory", "ironhold")]
table = ar.build_personality_win_table(rows)
assert table["ironhold"]["wins"] == 2
assert table["ironhold"]["appearances"] == 2
assert table["ironhold"]["losses"] == 0
def test_win_table_single_clan_no_wins():
rows = [_row("max_turns", "blackhammer"), _row("max_turns", "blackhammer")]
table = ar.build_personality_win_table(rows)
assert table["blackhammer"]["wins"] == 0
assert table["blackhammer"]["appearances"] == 2
assert table["blackhammer"]["losses"] == 2
def test_win_table_multiple_clans():
rows = [
_row("victory", "ironhold"),
_row("victory", "goldvein"),
_row("max_turns", "ironhold"),
_row("victory", "runesmith"),
]
table = ar.build_personality_win_table(rows)
assert table["ironhold"]["wins"] == 1
assert table["ironhold"]["appearances"] == 2
assert table["goldvein"]["wins"] == 1
assert table["goldvein"]["appearances"] == 1
assert table["runesmith"]["wins"] == 1
assert table["runesmith"]["appearances"] == 1
def test_win_table_losses_computed():
rows = [_row("victory", "deepforge"), _row("max_turns", "deepforge")]
table = ar.build_personality_win_table(rows)
assert table["deepforge"]["losses"] == 1
# ---------------------------------------------------------------------------
# autoplay-report: print_personality_summary
# ---------------------------------------------------------------------------
def test_print_personality_summary_no_data():
out = io.StringIO()
ar.print_personality_summary([], out=out)
assert "no data" in out.getvalue()
def test_print_personality_summary_imbalanced_flag():
rows = [_row("victory", "blackhammer")] * 3 + [_row("max_turns", "blackhammer")]
out = io.StringIO()
ar.print_personality_summary(rows, out=out)
assert "IMBALANCED" in out.getvalue()
assert "blackhammer" in out.getvalue()
def test_print_personality_summary_balanced_no_flag():
rows = [_row("victory", "runesmith"), _row("max_turns", "runesmith")]
out = io.StringIO()
ar.print_personality_summary(rows, out=out)
assert "IMBALANCED" not in out.getvalue()
# ---------------------------------------------------------------------------
# checklist-report: personality_win_balance
# ---------------------------------------------------------------------------
def test_pwb_no_data():
ok, detail = cr.personality_win_balance([])
assert ok is True
assert "no data" in detail
def test_pwb_empty_personality_ignored():
results = [_result("victory", ""), _result("max_turns", "")]
ok, detail = cr.personality_win_balance(results)
assert ok is True
assert "no data" in detail
def test_pwb_all_wins_imbalanced():
results = [_result("victory", "ironhold")] * 3
ok, detail = cr.personality_win_balance(results)
assert ok is False
assert "ironhold" in detail
def test_pwb_exactly_50_percent_passes():
results = [_result("victory", "goldvein"), _result("max_turns", "goldvein")]
ok, detail = cr.personality_win_balance(results)
assert ok is True
def test_pwb_mixed_clans_balanced():
results = [
_result("victory", "ironhold"),
_result("max_turns", "ironhold"),
_result("victory", "runesmith"),
_result("max_turns", "runesmith"),
]
ok, detail = cr.personality_win_balance(results)
assert ok is True
assert "ironhold" in detail
assert "runesmith" in detail
def test_pwb_one_clan_over_threshold():
results = [
_result("victory", "blackhammer"),
_result("victory", "blackhammer"),
_result("max_turns", "blackhammer"),
_result("victory", "runesmith"),
_result("max_turns", "runesmith"),
]
ok, detail = cr.personality_win_balance(results)
assert ok is False
assert "blackhammer" in detail
def test_pwb_detail_contains_win_counts():
results = [_result("victory", "deepforge"), _result("max_turns", "deepforge")]
_, detail = cr.personality_win_balance(results)
assert "1/2" in detail
# ---------------------------------------------------------------------------
# autoplay-report: build_per_clan_stats
# ---------------------------------------------------------------------------
def _row_with_clans(
outcome: str,
winner_index: int,
player_clans: dict,
snapshots: dict | None = None,
**extra,
) -> dict:
winner_clan = player_clans.get(str(winner_index), "") if outcome == "victory" else ""
base = _row(outcome, winner_clan, winner_index=winner_index)
base["_player_clans"] = dict(player_clans)
base["_snapshots"] = snapshots or {}
base["winner_index"] = winner_index
base.update(extra)
return base
def test_per_clan_stats_uses_player_clans():
# 2 games, ironhold vs goldvein in both. ironhold wins one, goldvein wins
# one → each has 2 appearances, 1 win.
rows = [
_row_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"}),
_row_with_clans("victory", 1, {"0": "ironhold", "1": "goldvein"}),
]
stats = ar.build_per_clan_stats(rows)
assert stats["ironhold"]["appearances"] == 2
assert stats["ironhold"]["wins"] == 1
assert stats["goldvein"]["appearances"] == 2
assert stats["goldvein"]["wins"] == 1
# Untouched clans present with 0 apps
assert stats["deepforge"]["appearances"] == 0
assert stats["deepforge"]["win_rate"] is None
def test_per_clan_stats_fallback_winner_only():
# No player_clans on row → fall back to winner_personality.
rows = [_row("victory", "blackhammer"), _row("max_turns", "")]
stats = ar.build_per_clan_stats(rows)
assert stats["blackhammer"]["appearances"] == 1
assert stats["blackhammer"]["wins"] == 1
def test_per_clan_stats_snapshot_medians():
snap = {
100: {"player_stats": {"0": {"mil": 5, "gold": 100}, "1": {"mil": 8, "gold": 200}}},
200: {"player_stats": {"0": {"mil": 15, "gold": 500}, "1": {"mil": 20, "gold": 600}}},
}
rows = [
_row_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"}, snapshots=snap),
]
stats = ar.build_per_clan_stats(rows)
assert stats["ironhold"]["median_unit_count_100"] == 5.0
assert stats["ironhold"]["median_gold_200"] == 500.0
assert stats["goldvein"]["median_unit_count_200"] == 20.0
# T300 absent → None
assert stats["ironhold"]["median_unit_count_300"] is None
def test_render_per_clan_table_runs():
out = io.StringIO()
rows = [_row_with_clans("victory", 1, {"0": "ironhold", "1": "goldvein"})]
stats = ar.render_per_clan_table(rows, out=out)
text = out.getvalue()
assert "per-clan stats:" in text
assert "ironhold" in text
assert "goldvein" in text
assert stats["goldvein"]["wins"] == 1
# ---------------------------------------------------------------------------
# checklist-report: personality_win_balance_verdict (CI-consumable)
# ---------------------------------------------------------------------------
def test_verdict_no_data_passes():
v = cr.personality_win_balance_verdict([])
assert v["pass"] is True
assert v["reasons"] == []
assert v["sample_size"] == 0
assert set(v["clans"].keys()) == set(cr.KNOWN_CLAN_IDS)
def test_verdict_fails_on_win_rate_over_50():
results = [_result("victory", "ironhold")] * 3
v = cr.personality_win_balance_verdict(results)
assert v["pass"] is False
assert any("ironhold" in r and "100.0%" in r for r in v["reasons"])
def test_verdict_passes_balanced_with_clans():
# Every game has 2 AI clans via player_clans. ironhold and goldvein each
# win 1 of 2 games → 50% each.
results = [
_result_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"}),
_result_with_clans("victory", 1, {"0": "ironhold", "1": "goldvein"}),
]
v = cr.personality_win_balance_verdict(results)
assert v["pass"] is True
assert v["clans"]["ironhold"]["appearances"] == 2
assert v["clans"]["ironhold"]["wins"] == 1
def test_verdict_fails_on_zero_wins_with_many_apps():
# deepforge appears 5 times, never wins → second clause trips.
results = [
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
]
v = cr.personality_win_balance_verdict(results)
assert v["pass"] is False
# ironhold fails on the 100% clause, deepforge fails on the zero-wins clause
assert any("deepforge" in r and "0 wins" in r for r in v["reasons"])
def test_verdict_tolerates_zero_wins_under_threshold():
# deepforge appears 4 times, zero wins → below threshold, not a failure
# ironhold wins all 4 → fails on >50% clause.
results = [
_result_with_clans("victory", 0, {"0": "ironhold", "1": "deepforge"}),
] * 4
v = cr.personality_win_balance_verdict(results)
deepforge_reasons = [r for r in v["reasons"] if "deepforge" in r and "0 wins" in r]
assert deepforge_reasons == []
def test_verdict_missing_clans_reported():
# Only ironhold/goldvein appear — others must be listed as missing.
results = [_result_with_clans("victory", 0, {"0": "ironhold", "1": "goldvein"})]
v = cr.personality_win_balance_verdict(results)
missing = set(v["missing_clans"])
assert "deepforge" in missing
assert "blackhammer" in missing
assert "runesmith" in missing
assert "ironhold" not in missing
# ---------------------------------------------------------------------------
# Runner
# ---------------------------------------------------------------------------
if __name__ == "__main__":
tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
passed = failed = 0
for t in tests:
try:
t()
print(f" PASS {t.__name__}")
passed += 1
except Exception as exc:
print(f" FAIL {t.__name__}: {exc}")
failed += 1
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)