magicciv/tools/test_quality_metrics.py
Natalie 43989eed82 feat(@projects/@magic-civilization): add autoplay quality metrics validation
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-17 14:23:47 -07:00

235 lines
7.6 KiB
Python

#!/usr/bin/env python3
"""Tests for p0-25 quality metrics: tier_peak, peak_unit_tier, wonder_count.
Covers:
- Schema validation round-trip: fabricated turn_stats lines with new fields pass
- Backward compat: pre-p0-25 lines (fields absent) still validate
- Reporter: build_quality_metrics surfaces correct medians
- Reporter: QUALITY_METRIC_ABSENT sentinel filters out pre-p0-25 rows
"""
from __future__ import annotations
import importlib.util as _iu
import io
import json
from pathlib import Path
_TOOLS = Path(__file__).parent
def _load(name: str, stem: str):
path = _TOOLS / f"{stem}.py"
spec = _iu.spec_from_file_location(name, path)
mod = _iu.module_from_spec(spec) # type: ignore[arg-type]
spec.loader.exec_module(mod) # type: ignore[union-attr]
return mod
ar = _load("autoplay_report", "autoplay-report")
av = _load("autoplay_validate", "autoplay-validate")
# ---------------------------------------------------------------------------
# Minimal valid turn_stats_line builder
# ---------------------------------------------------------------------------
def _base_player_stats(**extra) -> dict:
base = {
"pop": 5, "pop_peak": 8, "mil": 3,
"cities": 2, "cities_captured": 0, "cities_lost": 0,
"gold": 100, "gold_peak": 200, "gold_per_turn": 5,
"techs": 10, "tiles": 30, "buildings": 8,
"luxuries": 2, "happiness": 5,
"food_total": 12.0, "production_total": 10.0,
"kills": 2, "units_lost": 1,
"turn_first_pop_3": 40, "turn_first_pop_4": 80,
}
base.update(extra)
return base
def _base_line(**player_extra) -> dict:
return {
"turn": 300,
"outcome": "victory",
"winner_index": 0,
"winner_personality": "ironhold",
"victory_type": "domination",
"wall_clock_sec": 45.0,
"aggregate": {
"total_combats": 60,
"total_cities_founded": 4,
"total_cities_captured": 2,
"turn_first_combat": 10,
"turn_first_city_captured": 80,
"lair_cleared": 3,
"strategic_gate_rejected": 0,
},
"player_stats": {
"0": _base_player_stats(**player_extra),
"1": _base_player_stats(**player_extra),
},
"invariant_violations": [],
}
# ---------------------------------------------------------------------------
# Schema round-trip: new fields present
# ---------------------------------------------------------------------------
def test_schema_accepts_new_fields():
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
line = _base_line(tier_peak=7, peak_unit_tier=5, wonder_count=2)
errs = av.validate(line, schema)
assert errs == [], f"Schema rejected line with new fields: {errs}"
def test_schema_accepts_missing_new_fields_backward_compat():
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
line = _base_line() # no tier_peak / peak_unit_tier / wonder_count
errs = av.validate(line, schema)
assert errs == [], f"Schema rejected old line without quality fields: {errs}"
def test_schema_rejects_tier_peak_out_of_range():
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
line = _base_line(tier_peak=11) # max is 10
errs = av.validate(line, schema)
assert errs, "Schema should reject tier_peak=11"
def test_schema_rejects_negative_wonder_count():
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
line = _base_line(wonder_count=-1) # minimum is 0
errs = av.validate(line, schema)
assert errs, "Schema should reject wonder_count=-1"
# ---------------------------------------------------------------------------
# Reporter: build_quality_metrics
# ---------------------------------------------------------------------------
def _report_row(
winner_idx: int,
p0_tp: int | float,
p1_tp: int | float,
p0_put: int | float = ar.QUALITY_METRIC_ABSENT,
p1_put: int | float = ar.QUALITY_METRIC_ABSENT,
p0_wc: int | float = ar.QUALITY_METRIC_ABSENT,
p1_wc: int | float = ar.QUALITY_METRIC_ABSENT,
) -> dict:
return {
"winner_index": winner_idx,
"p0_tier_peak": p0_tp,
"p1_tier_peak": p1_tp,
"p0_peak_unit_tier": p0_put,
"p1_peak_unit_tier": p1_put,
"p0_wonder_count": p0_wc,
"p1_wonder_count": p1_wc,
}
def test_quality_metrics_winner_tier_peak():
rows = [_report_row(0, p0_tp=7, p1_tp=5)]
q = ar.build_quality_metrics(rows)
assert q["median_winner_tier_peak"] == 7.0
def test_quality_metrics_loser_tier_peak():
rows = [_report_row(0, p0_tp=7, p1_tp=5)]
q = ar.build_quality_metrics(rows)
assert q["median_loser_tier_peak"] == 5.0
def test_quality_metrics_tier_peak_gap():
rows = [_report_row(0, p0_tp=8, p1_tp=5)]
q = ar.build_quality_metrics(rows)
assert q["median_tier_peak_gap"] == 3.0
def test_quality_metrics_gap_median_across_seeds():
rows = [
_report_row(0, p0_tp=8, p1_tp=5), # gap 3
_report_row(1, p0_tp=4, p1_tp=7), # gap 3 (winner is p1)
_report_row(0, p0_tp=6, p1_tp=5), # gap 1
]
q = ar.build_quality_metrics(rows)
assert q["median_tier_peak_gap"] == 3.0
def test_quality_metrics_peak_unit_tier_median():
rows = [
_report_row(0, p0_tp=6, p1_tp=5, p0_put=5, p1_put=4),
_report_row(0, p0_tp=7, p1_tp=6, p0_put=6, p1_put=5),
]
q = ar.build_quality_metrics(rows)
# All four values: 5, 4, 6, 5 → median 5.0
assert q["median_peak_unit_tier"] == 5.0
def test_quality_metrics_wonder_count_median():
rows = [
_report_row(0, p0_tp=6, p1_tp=5, p0_wc=2, p1_wc=1),
_report_row(0, p0_tp=7, p1_tp=6, p0_wc=3, p1_wc=2),
]
q = ar.build_quality_metrics(rows)
# Values: 2, 1, 3, 2 → median 2.0
assert q["median_wonder_count_per_player"] == 2.0
def test_quality_metrics_absent_sentinel_filtered():
sentinel = ar.QUALITY_METRIC_ABSENT
rows = [_report_row(0, p0_tp=sentinel, p1_tp=sentinel)]
q = ar.build_quality_metrics(rows)
assert q["median_winner_tier_peak"] is None
assert q["median_loser_tier_peak"] is None
assert q["median_tier_peak_gap"] is None
def test_quality_metrics_all_none_when_no_data():
q = ar.build_quality_metrics([])
for v in q.values():
assert v is None
# ---------------------------------------------------------------------------
# Reporter: print_quality_metrics
# ---------------------------------------------------------------------------
def test_print_quality_metrics_renders_values():
rows = [_report_row(0, p0_tp=7, p1_tp=5, p0_put=6, p1_put=5, p0_wc=2, p1_wc=1)]
out = io.StringIO()
ar.print_quality_metrics(rows, out=out)
text = out.getvalue()
assert "state-at-end quality metrics" in text
assert "median_winner_tier_peak" in text
assert "median_tier_peak_gap" in text
def test_print_quality_metrics_no_data_message():
sentinel = ar.QUALITY_METRIC_ABSENT
rows = [_report_row(0, p0_tp=sentinel, p1_tp=sentinel)]
out = io.StringIO()
ar.print_quality_metrics(rows, out=out)
text = out.getvalue()
assert "no data" in text
# ---------------------------------------------------------------------------
# Runner (also works as pytest)
# ---------------------------------------------------------------------------
if __name__ == "__main__":
import sys
tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
passed = failed = 0
for t in tests:
try:
t()
print(f" PASS {t.__name__}")
passed += 1
except Exception as exc:
print(f" FAIL {t.__name__}: {exc}")
failed += 1
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)