fix(sprite-generation): 🐛 POI gates for landmarks/lairs + Grok backend hardening

- ranker: dedicated POI_GATES/POI_QUALITY for landmarks & lairs — they were routed through BUILDING_GATES (roof_visible/single_building/no_front_facade), so volcanoes, ley-confluence formations and lair camps failed every gate and burned generations to the regen cap. - grok_generator: _ensure_grok_sdk re-checks the import per candidate path and raises a clear 'set GROK_BUILD_SDK_PATH' error instead of a cryptic ImportError. - grok_generator: submit_batch generates a sprite's variants concurrently via asyncio.gather so the client's max_concurrent semaphore is actually used. - add test_grok_pipeline.py — 49 headless checks (factory, POI-gate routing regression, prompt adaptation, PNG validation, starter manifest). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 18:48:23 -04:00 · 2026-06-23 18:48:23 -04:00 · 33edc39863
commit 33edc39863
parent b13508ed04
3 changed files with 183 additions and 8 deletions
--- a/tools/sprite-generation/engine/grok_generator.py
+++ b/tools/sprite-generation/engine/grok_generator.py
@ -40,21 +40,33 @@ _NEGATIVE_PRIORITY = (


 def _ensure_grok_sdk() -> None:
-    """Add grok-build-batch-sdk to path if not installed as a package."""
+    """Make grok-build-batch-sdk importable, or fail with a clear message."""
    try:
        import grok_build_batch_sdk  # noqa: F401
        return
    except ImportError:
        pass

+    env_override = os.environ.get("GROK_BUILD_SDK_PATH")
    candidates = [
+        Path(env_override) if env_override else None,
        Path(__file__).resolve().parents[5] / "@applications/@ml/@packages/@py/grok-build-batch-sdk/src",
-        Path(os.environ["GROK_BUILD_SDK_PATH"]) if os.environ.get("GROK_BUILD_SDK_PATH") else None,
    ]
    for sdk_path in candidates:
-        if sdk_path and sdk_path.exists() and str(sdk_path) not in sys.path:
-            sys.path.insert(0, str(sdk_path))
-            return
+        if sdk_path and sdk_path.exists():
+            if str(sdk_path) not in sys.path:
+                sys.path.insert(0, str(sdk_path))
+            try:
+                import grok_build_batch_sdk  # noqa: F401
+                return
+            except ImportError:
+                continue
+
+    searched = [str(c) for c in candidates if c]
+    raise RuntimeError(
+        "grok-build-batch-sdk not found. Install it as a package or set "
+        f"GROK_BUILD_SDK_PATH to its src/ directory. Searched: {searched}"
+    )


 def _adapt_prompt_for_grok(prompt: str, negative: str, *, max_negative_terms: int = 12) -> str:
@ -374,8 +386,10 @@ class GrokSpriteGenerator:
            else:
                guidance = self.defaults.get("guidance_scale", 7.5)

-            for i in range(variants_per):
-                result = await self.submit_one(
+            # Generate this sprite's variants concurrently; GrokBuildClient's own
+            # semaphore bounds true parallelism to grok.max_concurrent.
+            results = await asyncio.gather(*(
+                self.submit_one(
                    sprite_id=sprite_id,
                    prompt=prompt,
                    negative=negative,
@ -386,6 +400,9 @@ class GrokSpriteGenerator:
                    priority=priority,
                    guidance_scale=guidance,
                )
+                for i in range(variants_per)
+            ))
+            for result in results:
                if result:
                    submitted += 1
                    if on_complete:
--- a/tools/sprite-generation/engine/ranker.py
+++ b/tools/sprite-generation/engine/ranker.py
@ -251,6 +251,14 @@ SPELL_GATES: tuple[str, ...] = (
 )
 SPELL_QUALITY: tuple[str, ...] = ("drama_impact", "magic_type_clarity", "color_vibrancy")

+# Map POIs (landmarks, lairs): a single isolated structure / natural formation /
+# camp seen from above. Unlike buildings these need NO roof and are often natural
+# (volcano, rock confluence, cave), so they must not be gated on building geometry.
+POI_GATES: tuple[str, ...] = (
+    "single_subject", "isolated_subject", "aerial_or_top_view", "no_text_watermark",
+)
+POI_QUALITY: tuple[str, ...] = ("subject_detail", "style_consistency", "readability_at_small_size")
+
 _NON_UNIT_GATE_DESCRIPTIONS: dict[str, str] = {
    "top_down_view": "Is the view perfectly top-down, looking straight down at ground like a satellite photo? Answer false if there's any horizon, sky, or perspective vanishing point.",
    "seamless_tileable": "Does this look like a seamless, tileable ground texture? Answer false if it has distinct borders, framing, or non-repeating composition.",
@ -274,6 +282,9 @@ _NON_UNIT_GATE_DESCRIPTIONS: dict[str, str] = {
    "recognizable_as_named": "Is the depicted object clearly recognizable as the named resource/improvement?",
    "magical_effect_visible": "Is there a visible magical energy effect? Answer false if it's just a dark void with no magic visible.",
    "dark_background": "Is the background dark/black? Answer false for bright or colorful backgrounds.",
+    "single_subject": "Is there exactly ONE structure, natural formation, or camp as the clear central subject? Answer false for multiple separate subjects, a village or cityscape, or a cluttered scene.",
+    "isolated_subject": "Is the subject isolated on a simple, plain background? Answer false if the image is a full-frame seamless ground texture with no distinct central subject.",
+    "aerial_or_top_view": "Is the subject viewed from above — a top-down or 3/4 aerial map view? Answer false for an eye-level shot with a horizon line, sky, or a straight-on front elevation.",
 }

 _NON_UNIT_QUALITY_DESCRIPTIONS: dict[str, str] = {
@ -286,6 +297,7 @@ _NON_UNIT_QUALITY_DESCRIPTIONS: dict[str, str] = {
    "icon_clarity": "How clear and readable is the icon? 90+ = instantly recognizable. 50 = vague blob.",
    "style_match": "Does it match the game's art direction? 90+ = perfect match. 50 = out of place.",
    "readability_at_64px": "Would this be readable at 64x64 pixels? 90+ = crisp and clear. 50 = loses all detail.",
+    "subject_detail": "How detailed and well-crafted is the structure or natural formation? 90+ = rich, characterful detail. 50 = generic shapeless blob.",
    "drama_impact": "How dramatic and impactful is the spell effect? 90+ = awe-inspiring. 50 = weak and unimpressive.",
    "magic_type_clarity": "Can you tell what type of magic this is? 90+ = instantly clear (fire, ice, death, etc). 50 = generic energy blob.",
    "color_vibrancy": "How vivid and magical are the colors? 90+ = stunning magical palette. 50 = dull and flat.",
@ -304,7 +316,7 @@ def _get_category_config(category: str) -> tuple[tuple[str, ...], tuple[str, ...
    if category == "buildings":
        return BUILDING_GATES, BUILDING_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
    if category in ("landmarks", "lairs"):
-        return BUILDING_GATES, BUILDING_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
+        return POI_GATES, POI_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
    if category in ("resources", "improvements"):
        return RESOURCE_GATES, RESOURCE_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
    if category == "spells":
--- a/tools/sprite-generation/test_grok_pipeline.py
+++ b/tools/sprite-generation/test_grok_pipeline.py
@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""Headless unit tests for the Grok backend + starter pipeline pure functions.
+
+Run: python3 test_grok_pipeline.py   (no GPU, no grok CLI, no model_boss needed)
+"""
+from __future__ import annotations
+
+import io
+import sys
+from pathlib import Path
+
+TOOL_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(TOOL_DIR))
+
+from engine.factory import BACKENDS, backend_summary, resolve_backend, with_backend
+from engine.grok_generator import _adapt_prompt_for_grok, _validate_png_bytes
+from engine.ranker import (
+    BUILDING_GATES,
+    POI_GATES,
+    POI_QUALITY,
+    _get_category_config,
+)
+from engine.starter import _unit_base_id, load_manifest, starter_sprite_ids
+
+_failures: list[str] = []
+
+
+def check(name: str, cond: bool, detail: str = "") -> None:
+    if cond:
+        print(f"  ok   {name}")
+    else:
+        print(f"  FAIL {name} {detail}")
+        _failures.append(name)
+
+
+def expect_raises(name: str, fn, exc: type[Exception]) -> None:
+    try:
+        fn()
+    except exc:
+        print(f"  ok   {name}")
+    except Exception as e:  # noqa: BLE001
+        print(f"  FAIL {name} (wrong exc: {type(e).__name__})")
+        _failures.append(name)
+    else:
+        print(f"  FAIL {name} (no exception)")
+        _failures.append(name)
+
+
+def test_factory() -> None:
+    print("factory:")
+    check("default backend", resolve_backend({}, None) == "model-boss")
+    check("config backend", resolve_backend({"backend": "grok"}, None) == "grok")
+    check("override wins", resolve_backend({"backend": "model-boss"}, "grok") == "grok")
+    expect_raises("unknown backend rejected", lambda: resolve_backend({}, "bogus"), ValueError)
+
+    original = {"backend": "model-boss", "model": "x"}
+    merged = with_backend(original, "grok")
+    check("with_backend resolves", merged["backend"] == "grok")
+    check("with_backend is a copy", original["backend"] == "model-boss")
+    check("grok summary label", "grok-build" in backend_summary({"backend": "grok"}))
+    check("backends tuple", set(BACKENDS) == {"model-boss", "grok"})
+
+
+def test_poi_gates() -> None:
+    print("ranker POI gates (landmarks/lairs):")
+    for cat in ("landmarks", "lairs"):
+        gates, quality, gate_desc, quality_desc = _get_category_config(cat)
+        check(f"{cat} -> POI_GATES", gates == POI_GATES)
+        check(f"{cat} -> POI_QUALITY", quality == POI_QUALITY)
+        check(f"{cat} drops roof_visible", "roof_visible" not in gates)
+        check(f"{cat} drops single_building", "single_building" not in gates)
+        check(f"{cat} has single_subject", "single_subject" in gates)
+        for g in gates:
+            check(f"{cat} gate '{g}' described", g in gate_desc)
+        for q in quality:
+            check(f"{cat} quality '{q}' described", q in quality_desc)
+
+    b_gates, _, _, _ = _get_category_config("buildings")
+    check("buildings unchanged", b_gates == BUILDING_GATES)
+    check("POI != BUILDING gates", POI_GATES != BUILDING_GATES)
+
+
+def test_adapt_prompt() -> None:
+    print("grok prompt adaptation:")
+    prompt = "game sprite, single character, dwarf warrior, dwarf warrior, plated"
+    negative = "anime, crowd, watermark"
+    out = _adapt_prompt_for_grok(prompt, negative)
+    check("dedupes repeated clause", out.lower().count("dwarf warrior") == 1)
+    check("adds facing boost for units", "Isometric strategy game unit sprite" in out)
+    check("injects negative avoid", "Avoid:" in out and "anime" in out)
+
+    non_unit = "isometric map landmark sprite, single volcano, painted"
+    out2 = _adapt_prompt_for_grok(non_unit, "")
+    check("no facing boost for non-units", "Back turned to camera" not in out2)
+
+
+def test_validate_png() -> None:
+    print("png validation:")
+    from PIL import Image
+
+    def _png(img: "Image.Image") -> bytes:
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+        return buf.getvalue()
+
+    # Bright noise: large + high luminance -> valid.
+    import numpy as np
+
+    arr = (np.random.default_rng(7).integers(60, 255, (128, 128, 3))).astype("uint8")
+    valid = _png(Image.fromarray(arr, "RGB"))
+    _validate_png_bytes(valid)  # must not raise
+    print("  ok   accepts bright noisy image")
+
+    black = _png(Image.new("RGB", (128, 128), (0, 0, 0)))
+    expect_raises("rejects solid black", lambda: _validate_png_bytes(black), RuntimeError)
+    expect_raises("rejects tiny blob", lambda: _validate_png_bytes(b"\x89PNG\r\n" + b"\x00" * 64), RuntimeError)
+
+
+def test_starter() -> None:
+    print("starter manifest:")
+    check("strip _dwarves_m", _unit_base_id("spearmen_dwarves_m") == "spearmen")
+    check("strip _f", _unit_base_id("founder_dwarves_f") == "founder")
+    check("no suffix untouched", _unit_base_id("worker") == "worker")
+
+    manifest = load_manifest()
+    ids = starter_sprite_ids(manifest)
+    expected = sum(len(manifest.get(k, [])) for k in ("units", "buildings", "landmarks", "lairs"))
+    check("id count matches manifest", len(ids) == expected, f"{len(ids)} vs {expected}")
+    check("unit id prefixed", "units/spearmen_dwarves_m" in ids)
+    check("lair id prefixed", "lairs/goblin_camp" in ids)
+    check("landmark id prefixed", "landmarks/volcano" in ids)
+
+
+def main() -> int:
+    for t in (test_factory, test_poi_gates, test_adapt_prompt, test_validate_png, test_starter):
+        t()
+    print()
+    if _failures:
+        print(f"FAILED: {len(_failures)} -> {_failures}")
+        return 1
+    print("All Grok-pipeline unit tests passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())