236 lines
7.6 KiB
Python
236 lines
7.6 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""Tests for p0-25 quality metrics: tier_peak, peak_unit_tier, wonder_count.
|
||
|
|
|
||
|
|
Covers:
|
||
|
|
- Schema validation round-trip: fabricated turn_stats lines with new fields pass
|
||
|
|
- Backward compat: pre-p0-25 lines (fields absent) still validate
|
||
|
|
- Reporter: build_quality_metrics surfaces correct medians
|
||
|
|
- Reporter: QUALITY_METRIC_ABSENT sentinel filters out pre-p0-25 rows
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import importlib.util as _iu
|
||
|
|
import io
|
||
|
|
import json
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
_TOOLS = Path(__file__).parent
|
||
|
|
|
||
|
|
|
||
|
|
def _load(name: str, stem: str):
|
||
|
|
path = _TOOLS / f"{stem}.py"
|
||
|
|
spec = _iu.spec_from_file_location(name, path)
|
||
|
|
mod = _iu.module_from_spec(spec) # type: ignore[arg-type]
|
||
|
|
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
||
|
|
return mod
|
||
|
|
|
||
|
|
|
||
|
|
ar = _load("autoplay_report", "autoplay-report")
|
||
|
|
av = _load("autoplay_validate", "autoplay-validate")
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# Minimal valid turn_stats_line builder
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def _base_player_stats(**extra) -> dict:
|
||
|
|
base = {
|
||
|
|
"pop": 5, "pop_peak": 8, "mil": 3,
|
||
|
|
"cities": 2, "cities_captured": 0, "cities_lost": 0,
|
||
|
|
"gold": 100, "gold_peak": 200, "gold_per_turn": 5,
|
||
|
|
"techs": 10, "tiles": 30, "buildings": 8,
|
||
|
|
"luxuries": 2, "happiness": 5,
|
||
|
|
"food_total": 12.0, "production_total": 10.0,
|
||
|
|
"kills": 2, "units_lost": 1,
|
||
|
|
"turn_first_pop_3": 40, "turn_first_pop_4": 80,
|
||
|
|
}
|
||
|
|
base.update(extra)
|
||
|
|
return base
|
||
|
|
|
||
|
|
|
||
|
|
def _base_line(**player_extra) -> dict:
|
||
|
|
return {
|
||
|
|
"turn": 300,
|
||
|
|
"outcome": "victory",
|
||
|
|
"winner_index": 0,
|
||
|
|
"winner_personality": "ironhold",
|
||
|
|
"victory_type": "domination",
|
||
|
|
"wall_clock_sec": 45.0,
|
||
|
|
"aggregate": {
|
||
|
|
"total_combats": 60,
|
||
|
|
"total_cities_founded": 4,
|
||
|
|
"total_cities_captured": 2,
|
||
|
|
"turn_first_combat": 10,
|
||
|
|
"turn_first_city_captured": 80,
|
||
|
|
"lair_cleared": 3,
|
||
|
|
"strategic_gate_rejected": 0,
|
||
|
|
},
|
||
|
|
"player_stats": {
|
||
|
|
"0": _base_player_stats(**player_extra),
|
||
|
|
"1": _base_player_stats(**player_extra),
|
||
|
|
},
|
||
|
|
"invariant_violations": [],
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# Schema round-trip: new fields present
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def test_schema_accepts_new_fields():
|
||
|
|
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
|
||
|
|
line = _base_line(tier_peak=7, peak_unit_tier=5, wonder_count=2)
|
||
|
|
errs = av.validate(line, schema)
|
||
|
|
assert errs == [], f"Schema rejected line with new fields: {errs}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_schema_accepts_missing_new_fields_backward_compat():
|
||
|
|
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
|
||
|
|
line = _base_line() # no tier_peak / peak_unit_tier / wonder_count
|
||
|
|
errs = av.validate(line, schema)
|
||
|
|
assert errs == [], f"Schema rejected old line without quality fields: {errs}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_schema_rejects_tier_peak_out_of_range():
|
||
|
|
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
|
||
|
|
line = _base_line(tier_peak=11) # max is 10
|
||
|
|
errs = av.validate(line, schema)
|
||
|
|
assert errs, "Schema should reject tier_peak=11"
|
||
|
|
|
||
|
|
|
||
|
|
def test_schema_rejects_negative_wonder_count():
|
||
|
|
schema = ar.load_schema(ar.TURN_STATS_SCHEMA_NAME)
|
||
|
|
line = _base_line(wonder_count=-1) # minimum is 0
|
||
|
|
errs = av.validate(line, schema)
|
||
|
|
assert errs, "Schema should reject wonder_count=-1"
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# Reporter: build_quality_metrics
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def _report_row(
|
||
|
|
winner_idx: int,
|
||
|
|
p0_tp: int | float,
|
||
|
|
p1_tp: int | float,
|
||
|
|
p0_put: int | float = ar.QUALITY_METRIC_ABSENT,
|
||
|
|
p1_put: int | float = ar.QUALITY_METRIC_ABSENT,
|
||
|
|
p0_wc: int | float = ar.QUALITY_METRIC_ABSENT,
|
||
|
|
p1_wc: int | float = ar.QUALITY_METRIC_ABSENT,
|
||
|
|
) -> dict:
|
||
|
|
return {
|
||
|
|
"winner_index": winner_idx,
|
||
|
|
"p0_tier_peak": p0_tp,
|
||
|
|
"p1_tier_peak": p1_tp,
|
||
|
|
"p0_peak_unit_tier": p0_put,
|
||
|
|
"p1_peak_unit_tier": p1_put,
|
||
|
|
"p0_wonder_count": p0_wc,
|
||
|
|
"p1_wonder_count": p1_wc,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_winner_tier_peak():
|
||
|
|
rows = [_report_row(0, p0_tp=7, p1_tp=5)]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
assert q["median_winner_tier_peak"] == 7.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_loser_tier_peak():
|
||
|
|
rows = [_report_row(0, p0_tp=7, p1_tp=5)]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
assert q["median_loser_tier_peak"] == 5.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_tier_peak_gap():
|
||
|
|
rows = [_report_row(0, p0_tp=8, p1_tp=5)]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
assert q["median_tier_peak_gap"] == 3.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_gap_median_across_seeds():
|
||
|
|
rows = [
|
||
|
|
_report_row(0, p0_tp=8, p1_tp=5), # gap 3
|
||
|
|
_report_row(1, p0_tp=4, p1_tp=7), # gap 3 (winner is p1)
|
||
|
|
_report_row(0, p0_tp=6, p1_tp=5), # gap 1
|
||
|
|
]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
assert q["median_tier_peak_gap"] == 3.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_peak_unit_tier_median():
|
||
|
|
rows = [
|
||
|
|
_report_row(0, p0_tp=6, p1_tp=5, p0_put=5, p1_put=4),
|
||
|
|
_report_row(0, p0_tp=7, p1_tp=6, p0_put=6, p1_put=5),
|
||
|
|
]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
# All four values: 5, 4, 6, 5 → median 5.0
|
||
|
|
assert q["median_peak_unit_tier"] == 5.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_wonder_count_median():
|
||
|
|
rows = [
|
||
|
|
_report_row(0, p0_tp=6, p1_tp=5, p0_wc=2, p1_wc=1),
|
||
|
|
_report_row(0, p0_tp=7, p1_tp=6, p0_wc=3, p1_wc=2),
|
||
|
|
]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
# Values: 2, 1, 3, 2 → median 2.0
|
||
|
|
assert q["median_wonder_count_per_player"] == 2.0
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_absent_sentinel_filtered():
|
||
|
|
sentinel = ar.QUALITY_METRIC_ABSENT
|
||
|
|
rows = [_report_row(0, p0_tp=sentinel, p1_tp=sentinel)]
|
||
|
|
q = ar.build_quality_metrics(rows)
|
||
|
|
assert q["median_winner_tier_peak"] is None
|
||
|
|
assert q["median_loser_tier_peak"] is None
|
||
|
|
assert q["median_tier_peak_gap"] is None
|
||
|
|
|
||
|
|
|
||
|
|
def test_quality_metrics_all_none_when_no_data():
|
||
|
|
q = ar.build_quality_metrics([])
|
||
|
|
for v in q.values():
|
||
|
|
assert v is None
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# Reporter: print_quality_metrics
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
def test_print_quality_metrics_renders_values():
|
||
|
|
rows = [_report_row(0, p0_tp=7, p1_tp=5, p0_put=6, p1_put=5, p0_wc=2, p1_wc=1)]
|
||
|
|
out = io.StringIO()
|
||
|
|
ar.print_quality_metrics(rows, out=out)
|
||
|
|
text = out.getvalue()
|
||
|
|
assert "state-at-end quality metrics" in text
|
||
|
|
assert "median_winner_tier_peak" in text
|
||
|
|
assert "median_tier_peak_gap" in text
|
||
|
|
|
||
|
|
|
||
|
|
def test_print_quality_metrics_no_data_message():
|
||
|
|
sentinel = ar.QUALITY_METRIC_ABSENT
|
||
|
|
rows = [_report_row(0, p0_tp=sentinel, p1_tp=sentinel)]
|
||
|
|
out = io.StringIO()
|
||
|
|
ar.print_quality_metrics(rows, out=out)
|
||
|
|
text = out.getvalue()
|
||
|
|
assert "no data" in text
|
||
|
|
|
||
|
|
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
# Runner (also works as pytest)
|
||
|
|
# ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
import sys
|
||
|
|
tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
|
||
|
|
passed = failed = 0
|
||
|
|
for t in tests:
|
||
|
|
try:
|
||
|
|
t()
|
||
|
|
print(f" PASS {t.__name__}")
|
||
|
|
passed += 1
|
||
|
|
except Exception as exc:
|
||
|
|
print(f" FAIL {t.__name__}: {exc}")
|
||
|
|
failed += 1
|
||
|
|
print(f"\n{passed} passed, {failed} failed")
|
||
|
|
sys.exit(0 if failed == 0 else 1)
|