diff --git a/public/games/age-of-dwarves/data/ai/reward_overlays.json b/public/games/age-of-dwarves/data/ai/reward_overlays.json
new file mode 100644
index 00000000..f36d0820
--- /dev/null
+++ b/public/games/age-of-dwarves/data/ai/reward_overlays.json
@@ -0,0 +1,68 @@
+{
+  "$comment": "Per-clan reward-shaping overlays for clan-conditioned RL (mc-ai-trained-not-scripted). Multipliers on the EVENT-reward groups only \u2014 terminal win/loss/decisive bonus stay universal so every clan equally wants to win, differing only in intermediate incentives. Derived from ai_personalities.json strategic_axes (axis/5 = neutral at 5), normalized per clan to mean 1.0 so total shaping magnitude is comparable across clans (no difficulty/fairness confound). Generated, do not hand-edit; regenerate from the axes.",
+  "groups": {
+    "combat": [
+      "capital_captured_by_me",
+      "city_captured_by_me",
+      "enemy_unit_killed_by_me",
+      "opponent_eliminated"
+    ],
+    "expansion": [
+      "city_founded_by_me"
+    ],
+    "production": [
+      "wonder_built_by_me"
+    ],
+    "economy": [
+      "score_delta"
+    ],
+    "tech": [
+      "tech_researched_by_me",
+      "culture_researched_by_me"
+    ]
+  },
+  "overlays": {
+    "ironhold": {
+      "combat": 1.0909,
+      "expansion": 0.7273,
+      "production": 1.6364,
+      "economy": 0.5455,
+      "tech": 1.0
+    },
+    "goldvein": {
+      "combat": 0.5455,
+      "expansion": 0.9091,
+      "production": 0.9091,
+      "economy": 1.6364,
+      "tech": 1.0
+    },
+    "blackhammer": {
+      "combat": 1.5,
+      "expansion": 1.0,
+      "production": 1.1667,
+      "economy": 0.3333,
+      "tech": 1.0
+    },
+    "deepforge": {
+      "combat": 0.8421,
+      "expansion": 0.4211,
+      "production": 1.6842,
+      "economy": 1.0526,
+      "tech": 1.0
+    },
+    "tinkersmith": {
+      "combat": 0.8421,
+      "expansion": 1.0526,
+      "production": 1.2632,
+      "economy": 0.8421,
+      "tech": 1.0
+    },
+    "runesmith": {
+      "combat": 0.9091,
+      "expansion": 1.0909,
+      "production": 0.9091,
+      "economy": 1.0909,
+      "tech": 1.0
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/game/engine/scenes/headless/player_api_main.gd b/src/game/engine/scenes/headless/player_api_main.gd
index d76a5a21..d0f45dbe 100644
--- a/src/game/engine/scenes/headless/player_api_main.gd
+++ b/src/game/engine/scenes/headless/player_api_main.gd
@@ -696,6 +696,29 @@ func _apply_ai_assignments(gs: RefCounted, num_players: int) -> void:
 		else:
 			_emit_event("ai_controller_assigned", {"slot": slot, "controller_id": controller_id})
 
+	# Clan-condition the LEARNER slot (the Python-controlled slot). Stamp its
+	# clan id so `PlayerState.clan_id` projects into `PlayerView.clan_index`,
+	# which the learned-controller observation one-hots — this is how RL
+	# training conditions the policy on the clan it is playing. The scoring
+	# weights set alongside are inert (the learner's actions come from stdin,
+	# not the scripted AI). No-op if unset/invalid: learner stays the
+	# generalist (clan_index = -1).
+	var learner_clan: String = _env_or("CP_LEARNER_CLAN", "").strip_edges()
+	if not learner_clan.is_empty():
+		if clan_ids.has(learner_clan):
+			var ok_learner: bool = bool(
+				gs.set_player_personality_json(_player_slot, learner_clan, json_text)
+			)
+			if ok_learner:
+				_emit_event("learner_clan_assigned", {"slot": _player_slot, "clan_id": learner_clan})
+			else:
+				_emit_protocol_error(
+					"set_player_personality_json failed for learner slot=%d clan=%s"
+					% [_player_slot, learner_clan]
+				)
+		else:
+			_emit_protocol_error("CP_LEARNER_CLAN=%s not in ai_personalities.json" % learner_clan)
+
 
 func _scan_land_tiles(grid: RefCounted, w: int, h: int) -> Array[Vector2i]:
 	## Walk the grid and collect every land hex. Mirrors
diff --git a/tooling/rl_self_play/harness_client.py b/tooling/rl_self_play/harness_client.py
index 76bb7d9b..a9007ed0 100644
--- a/tooling/rl_self_play/harness_client.py
+++ b/tooling/rl_self_play/harness_client.py
@@ -69,6 +69,12 @@ class HarnessConfig:
     # Set this to mix learned + scripted opponents in one game, e.g.
     # `("learned:duel-v1b", "", "")` puts learned on the first AI slot.
     player_controllers: tuple[str, ...] = ()
+    # Clan-conditioned RL: stamp the LEARNER slot's clan id (an
+    # ai_personalities.json key, e.g. "blackhammer") so PlayerState.clan_id
+    # projects into PlayerView.clan_index and the observation one-hots it.
+    # Empty = generalist (clan_index = -1). See player_api_main.gd
+    # CP_LEARNER_CLAN.
+    learner_clan: str = ""
 
     @property
     def effective_player_slots(self) -> tuple[int, ...]:
@@ -90,6 +96,8 @@ class HarnessConfig:
         }
         if self.player_controllers:
             env["CP_PLAYER_CONTROLLERS"] = ",".join(self.player_controllers)
+        if self.learner_clan:
+            env["CP_LEARNER_CLAN"] = self.learner_clan
         return env
 
 
diff --git a/tooling/rl_self_play/magic_civ_env.py b/tooling/rl_self_play/magic_civ_env.py
index 2ed03f0b..b50478f1 100644
--- a/tooling/rl_self_play/magic_civ_env.py
+++ b/tooling/rl_self_play/magic_civ_env.py
@@ -15,6 +15,9 @@ its win rate against this baseline; the policy is considered to have
 """
 from __future__ import annotations
 
+import json
+import os
+import random
 import sys
 import time
 from dataclasses import replace
@@ -107,6 +110,20 @@ def _step_penalty(turn: int) -> float:
 DEFAULT_MAX_STEPS_PER_EPISODE = 250_000
 DEFAULT_MAX_TURNS = 1000
 
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+_OVERLAYS_REL = "public/games/age-of-dwarves/data/ai/reward_overlays.json"
+
+
+def _load_reward_overlays() -> dict[str, dict[str, float]]:
+    """Per-clan reward-shaping overlays (clan -> {group -> multiplier}). Missing
+    file = no overlays (every clan trains on the neutral catalog)."""
+    path = os.environ.get("MC_REWARD_OVERLAYS") or os.path.join(_REPO_ROOT, _OVERLAYS_REL)
+    try:
+        with open(path, encoding="utf-8") as fh:
+            return json.load(fh).get("overlays", {})
+    except (OSError, json.JSONDecodeError):
+        return {}
+
 
 class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
     """Single-learner Gym wrapper: our policy controls slot 0.
@@ -130,6 +147,7 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
         max_turns: int = DEFAULT_MAX_TURNS,
         max_steps_per_episode: int = DEFAULT_MAX_STEPS_PER_EPISODE,
         opponent: ModelOpponent | None = None,
+        clan_list: tuple[str, ...] = (),
     ) -> None:
         super().__init__()
         self._config = harness_config or HarnessConfig()
@@ -168,6 +186,22 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
         # opponent elimination (the old duel-only 1v1 shortcut). The
         # authoritative `game_over` event still takes priority when present.
         self._live_players: set[int] = set()
+        # Clan-conditioned RL. Each episode the env samples a clan from
+        # `clan_list`, stamps it on the learner slot (CP_LEARNER_CLAN → the obs
+        # clan one-hot) and scales the SHAPING rewards by that clan's overlay
+        # (group -> multiplier). Terminal win/loss/decisive stay universal so
+        # every clan equally wants to win. Empty list = generalist (no clan,
+        # neutral catalog). Seeded RNG → reproducible clan sequence per run.
+        self._clan_list: tuple[str, ...] = tuple(clan_list)
+        self._overlays: dict[str, dict[str, float]] = _load_reward_overlays()
+        self._clan_rng = random.Random(self._config.seed)
+        self._cur_clan: str = ""
+        self._cur_overlay: dict[str, float] = {}
+
+    def _ov(self, group: str) -> float:
+        """Reward-shaping multiplier for the current episode's clan (1.0 if
+        generalist / unknown group)."""
+        return self._cur_overlay.get(group, 1.0)
 
     # ── Gymnasium API ────────────────────────────────────────────────
 
@@ -190,6 +224,15 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
         # dropped them, which would have un-declared the external slots.
         if seed is not None:
             cfg = replace(cfg, seed=seed)
+        # Clan-conditioned RL: sample this episode's clan, stamp it on the
+        # learner slot (CP_LEARNER_CLAN), and select its reward overlay.
+        if self._clan_list:
+            self._cur_clan = self._clan_rng.choice(self._clan_list)
+            cfg = replace(cfg, learner_clan=self._cur_clan)
+            self._cur_overlay = self._overlays.get(self._cur_clan, {})
+        else:
+            self._cur_clan = ""
+            self._cur_overlay = {}
         self._terminated = False
         self._step_count = 0
         self._capital_by_player = {}
@@ -309,7 +352,7 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
         prev_score = self._last_score
         new_score = float(view.get("score", {}).get("score_estimate", 0.0))
         # Symmetric score-delta — gains and losses both count.
-        reward += SCORE_DELTA_SCALE * (new_score - prev_score)
+        reward += SCORE_DELTA_SCALE * (new_score - prev_score) * self._ov("economy")
         # Event-driven shaping (Phase 1 catalog).
         reward += self._apply_event_rewards(recent_events, me)
 
@@ -454,7 +497,7 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
                     self._capital_by_player[owner] = cid
                 if owner == me:
                     if self._city_founded_rewards_issued < MAX_CITY_FOUNDED_REWARDS:
-                        total += CITY_FOUNDED_BY_ME
+                        total += CITY_FOUNDED_BY_ME * self._ov("expansion")
                         self._city_founded_rewards_issued += 1
             elif kind == "city_captured":
                 old_owner = int(ev.get("old_owner", -1))
@@ -465,14 +508,16 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
                     and self._capital_by_player.get(old_owner) == cid
                 )
                 if new_owner == me:
-                    total += CAPITAL_CAPTURED_BY_ME if is_capital else CITY_CAPTURED_BY_ME
+                    total += (
+                        CAPITAL_CAPTURED_BY_ME if is_capital else CITY_CAPTURED_BY_ME
+                    ) * self._ov("combat")
                 elif old_owner == me:
                     total += CAPITAL_LOST_BY_ME if is_capital else CITY_LOST_BY_ME
                 # When a capital changes hands, the *capturer's* first
                 # city is still their own capital — don't reassign.
             elif kind == "wonder_built":
                 if int(ev.get("player", -1)) == me:
-                    total += WONDER_BUILT_BY_ME
+                    total += WONDER_BUILT_BY_ME * self._ov("production")
             elif kind == "combat_resolved":
                 # Attribution: the wire event carries unit ids, not owners.
                 # We synthesise from defender_killed/attacker_killed plus
@@ -494,17 +539,17 @@ class MagicCivEnv(gym.Env[np.ndarray, np.int64]):
                     # asymmetric ±0.04/+0.05 is net-positive on even trades).
                     killer = ev.get("killer_unit_id")
                     if killer is None or self._unit_owner_lookup(str(killer)) == me:
-                        total += ENEMY_UNIT_KILLED_BY_ME
+                        total += ENEMY_UNIT_KILLED_BY_ME * self._ov("combat")
             elif kind == "tech_researched":
                 if int(ev.get("player", -1)) == me:
-                    total += TECH_RESEARCHED_BY_ME
+                    total += TECH_RESEARCHED_BY_ME * self._ov("tech")
             elif kind == "culture_researched":
                 if int(ev.get("player", -1)) == me:
-                    total += CULTURE_RESEARCHED_BY_ME
+                    total += CULTURE_RESEARCHED_BY_ME * self._ov("tech")
             elif kind == "player_eliminated":
                 p = int(ev.get("player", -1))
                 if p != me and p >= 0:
-                    total += OPPONENT_ELIMINATED
+                    total += OPPONENT_ELIMINATED * self._ov("combat")
         return total
 
     def _unit_owner_lookup(self, unit_id: str) -> int:
diff --git a/tooling/rl_self_play/train.py b/tooling/rl_self_play/train.py
index 653f55f4..6c29bcfe 100644
--- a/tooling/rl_self_play/train.py
+++ b/tooling/rl_self_play/train.py
@@ -66,6 +66,13 @@ def _build_argparser() -> argparse.ArgumentParser:
                    help="Stop training once eval win-rate exceeds this (default: 0.55).")
     p.add_argument("--run-name", default="duel-v1",
                    help="Subdirectory under runs/ + models/ (default: duel-v1).")
+    p.add_argument("--clan", default="",
+                   help=("Clan-conditioned training. '' = generalist (no clan, "
+                         "clan_index=-1). 'all' = sample every clan per episode "
+                         "(one conditioned policy for all clans). Or a comma list "
+                         "of ai_personalities.json ids, e.g. 'blackhammer'. The "
+                         "sampled clan is stamped on the learner (obs one-hot) and "
+                         "selects its reward overlay."))
     p.add_argument("--seed", type=int, default=42,
                    help="Base RNG seed; per-env seeds offset from this (default: 42).")
     p.add_argument("--init-from", default=None,
@@ -111,6 +118,18 @@ def _make_env_factory(args: argparse.Namespace, env_idx: int):
         int(s) for s in str(args.opponent_slots).split(",") if s.strip()
     )
 
+    # Resolve the clan list for clan-conditioned training. '' = generalist;
+    # 'all' = every clan (from the obs schema's canonical clan_order); else a
+    # comma list of ai_personalities.json ids.
+    clan_arg = str(getattr(args, "clan", "") or "").strip()
+    if not clan_arg:
+        clan_list: tuple[str, ...] = ()
+    elif clan_arg == "all":
+        from tooling.rl_self_play.obs_contract import load_schema  # type: ignore[import-not-found]
+        clan_list = tuple(load_schema()["clan_order"])
+    else:
+        clan_list = tuple(c.strip() for c in clan_arg.split(",") if c.strip())
+
     def _make() -> MagicCivEnv:
         cfg = HarnessConfig(
             seed=args.seed + env_idx,
@@ -128,7 +147,10 @@ def _make_env_factory(args: argparse.Namespace, env_idx: int):
                 deterministic=args.opponent_deterministic,
             )
         return MagicCivEnv(
-            harness_config=cfg, max_turns=args.max_turns, opponent=opponent
+            harness_config=cfg,
+            max_turns=args.max_turns,
+            opponent=opponent,
+            clan_list=clan_list,
         )
 
     return _make