diff --git a/tooling/rl_self_play/magic_civ_env.py b/tooling/rl_self_play/magic_civ_env.py
index b50478f1..d90c04cf 100644
--- a/tooling/rl_self_play/magic_civ_env.py
+++ b/tooling/rl_self_play/magic_civ_env.py
@@ -82,7 +82,13 @@ OPPONENT_ELIMINATED = 0.50
 # the dense intra-turn gradient. The slow-game ramp adds linearly-
 # growing per-step pressure after SLOW_PENALTY_START turns, reaching
 # SLOW_PENALTY_PEAK per step at turn SLOW_PENALTY_START + SLOW_PENALTY_SPAN.
-SCORE_DELTA_SCALE = 1e-3
+#
+# NOTE: score_estimate is now the UNIFIED raw score (mc-score ScoreController,
+# unbounded) — ~10-20x larger magnitude than the old clamped [0,1000] scale, so
+# SCORE_DELTA_SCALE was dropped from 1e-3 to 1e-4 to keep the per-turn score
+# reward in the same range as the other terms. Retune empirically once the
+# self-play stable resumes training on the unified objective.
+SCORE_DELTA_SCALE = 1e-4
 STEP_PENALTY_BASE = 5e-4
 SLOW_PENALTY_PEAK = 1e-3
 SLOW_PENALTY_START = 500