fix(sprite-generation): 🐛 POI gates for landmarks/lairs + Grok backend hardening

- ranker: dedicated POI_GATES/POI_QUALITY for landmarks & lairs — they were
  routed through BUILDING_GATES (roof_visible/single_building/no_front_facade),
  so volcanoes, ley-confluence formations and lair camps failed every gate and
  burned generations to the regen cap.
- grok_generator: _ensure_grok_sdk re-checks the import per candidate path and
  raises a clear 'set GROK_BUILD_SDK_PATH' error instead of a cryptic ImportError.
- grok_generator: submit_batch generates a sprite's variants concurrently via
  asyncio.gather so the client's max_concurrent semaphore is actually used.
- add test_grok_pipeline.py — 49 headless checks (factory, POI-gate routing
  regression, prompt adaptation, PNG validation, starter manifest).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Natalie 2026-06-23 18:48:23 -04:00
parent b13508ed04
commit 33edc39863
3 changed files with 183 additions and 8 deletions

View file

@ -40,21 +40,33 @@ _NEGATIVE_PRIORITY = (
def _ensure_grok_sdk() -> None:
"""Add grok-build-batch-sdk to path if not installed as a package."""
"""Make grok-build-batch-sdk importable, or fail with a clear message."""
try:
import grok_build_batch_sdk # noqa: F401
return
except ImportError:
pass
env_override = os.environ.get("GROK_BUILD_SDK_PATH")
candidates = [
Path(env_override) if env_override else None,
Path(__file__).resolve().parents[5] / "@applications/@ml/@packages/@py/grok-build-batch-sdk/src",
Path(os.environ["GROK_BUILD_SDK_PATH"]) if os.environ.get("GROK_BUILD_SDK_PATH") else None,
]
for sdk_path in candidates:
if sdk_path and sdk_path.exists() and str(sdk_path) not in sys.path:
sys.path.insert(0, str(sdk_path))
return
if sdk_path and sdk_path.exists():
if str(sdk_path) not in sys.path:
sys.path.insert(0, str(sdk_path))
try:
import grok_build_batch_sdk # noqa: F401
return
except ImportError:
continue
searched = [str(c) for c in candidates if c]
raise RuntimeError(
"grok-build-batch-sdk not found. Install it as a package or set "
f"GROK_BUILD_SDK_PATH to its src/ directory. Searched: {searched}"
)
def _adapt_prompt_for_grok(prompt: str, negative: str, *, max_negative_terms: int = 12) -> str:
@ -374,8 +386,10 @@ class GrokSpriteGenerator:
else:
guidance = self.defaults.get("guidance_scale", 7.5)
for i in range(variants_per):
result = await self.submit_one(
# Generate this sprite's variants concurrently; GrokBuildClient's own
# semaphore bounds true parallelism to grok.max_concurrent.
results = await asyncio.gather(*(
self.submit_one(
sprite_id=sprite_id,
prompt=prompt,
negative=negative,
@ -386,6 +400,9 @@ class GrokSpriteGenerator:
priority=priority,
guidance_scale=guidance,
)
for i in range(variants_per)
))
for result in results:
if result:
submitted += 1
if on_complete:

View file

@ -251,6 +251,14 @@ SPELL_GATES: tuple[str, ...] = (
)
SPELL_QUALITY: tuple[str, ...] = ("drama_impact", "magic_type_clarity", "color_vibrancy")
# Map POIs (landmarks, lairs): a single isolated structure / natural formation /
# camp seen from above. Unlike buildings these need NO roof and are often natural
# (volcano, rock confluence, cave), so they must not be gated on building geometry.
POI_GATES: tuple[str, ...] = (
"single_subject", "isolated_subject", "aerial_or_top_view", "no_text_watermark",
)
POI_QUALITY: tuple[str, ...] = ("subject_detail", "style_consistency", "readability_at_small_size")
_NON_UNIT_GATE_DESCRIPTIONS: dict[str, str] = {
"top_down_view": "Is the view perfectly top-down, looking straight down at ground like a satellite photo? Answer false if there's any horizon, sky, or perspective vanishing point.",
"seamless_tileable": "Does this look like a seamless, tileable ground texture? Answer false if it has distinct borders, framing, or non-repeating composition.",
@ -274,6 +282,9 @@ _NON_UNIT_GATE_DESCRIPTIONS: dict[str, str] = {
"recognizable_as_named": "Is the depicted object clearly recognizable as the named resource/improvement?",
"magical_effect_visible": "Is there a visible magical energy effect? Answer false if it's just a dark void with no magic visible.",
"dark_background": "Is the background dark/black? Answer false for bright or colorful backgrounds.",
"single_subject": "Is there exactly ONE structure, natural formation, or camp as the clear central subject? Answer false for multiple separate subjects, a village or cityscape, or a cluttered scene.",
"isolated_subject": "Is the subject isolated on a simple, plain background? Answer false if the image is a full-frame seamless ground texture with no distinct central subject.",
"aerial_or_top_view": "Is the subject viewed from above — a top-down or 3/4 aerial map view? Answer false for an eye-level shot with a horizon line, sky, or a straight-on front elevation.",
}
_NON_UNIT_QUALITY_DESCRIPTIONS: dict[str, str] = {
@ -286,6 +297,7 @@ _NON_UNIT_QUALITY_DESCRIPTIONS: dict[str, str] = {
"icon_clarity": "How clear and readable is the icon? 90+ = instantly recognizable. 50 = vague blob.",
"style_match": "Does it match the game's art direction? 90+ = perfect match. 50 = out of place.",
"readability_at_64px": "Would this be readable at 64x64 pixels? 90+ = crisp and clear. 50 = loses all detail.",
"subject_detail": "How detailed and well-crafted is the structure or natural formation? 90+ = rich, characterful detail. 50 = generic shapeless blob.",
"drama_impact": "How dramatic and impactful is the spell effect? 90+ = awe-inspiring. 50 = weak and unimpressive.",
"magic_type_clarity": "Can you tell what type of magic this is? 90+ = instantly clear (fire, ice, death, etc). 50 = generic energy blob.",
"color_vibrancy": "How vivid and magical are the colors? 90+ = stunning magical palette. 50 = dull and flat.",
@ -304,7 +316,7 @@ def _get_category_config(category: str) -> tuple[tuple[str, ...], tuple[str, ...
if category == "buildings":
return BUILDING_GATES, BUILDING_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
if category in ("landmarks", "lairs"):
return BUILDING_GATES, BUILDING_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
return POI_GATES, POI_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
if category in ("resources", "improvements"):
return RESOURCE_GATES, RESOURCE_QUALITY, _NON_UNIT_GATE_DESCRIPTIONS, _NON_UNIT_QUALITY_DESCRIPTIONS
if category == "spells":

View file

@ -0,0 +1,146 @@
#!/usr/bin/env python3
"""Headless unit tests for the Grok backend + starter pipeline pure functions.
Run: python3 test_grok_pipeline.py (no GPU, no grok CLI, no model_boss needed)
"""
from __future__ import annotations
import io
import sys
from pathlib import Path
TOOL_DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(TOOL_DIR))
from engine.factory import BACKENDS, backend_summary, resolve_backend, with_backend
from engine.grok_generator import _adapt_prompt_for_grok, _validate_png_bytes
from engine.ranker import (
BUILDING_GATES,
POI_GATES,
POI_QUALITY,
_get_category_config,
)
from engine.starter import _unit_base_id, load_manifest, starter_sprite_ids
_failures: list[str] = []
def check(name: str, cond: bool, detail: str = "") -> None:
if cond:
print(f" ok {name}")
else:
print(f" FAIL {name} {detail}")
_failures.append(name)
def expect_raises(name: str, fn, exc: type[Exception]) -> None:
try:
fn()
except exc:
print(f" ok {name}")
except Exception as e: # noqa: BLE001
print(f" FAIL {name} (wrong exc: {type(e).__name__})")
_failures.append(name)
else:
print(f" FAIL {name} (no exception)")
_failures.append(name)
def test_factory() -> None:
print("factory:")
check("default backend", resolve_backend({}, None) == "model-boss")
check("config backend", resolve_backend({"backend": "grok"}, None) == "grok")
check("override wins", resolve_backend({"backend": "model-boss"}, "grok") == "grok")
expect_raises("unknown backend rejected", lambda: resolve_backend({}, "bogus"), ValueError)
original = {"backend": "model-boss", "model": "x"}
merged = with_backend(original, "grok")
check("with_backend resolves", merged["backend"] == "grok")
check("with_backend is a copy", original["backend"] == "model-boss")
check("grok summary label", "grok-build" in backend_summary({"backend": "grok"}))
check("backends tuple", set(BACKENDS) == {"model-boss", "grok"})
def test_poi_gates() -> None:
print("ranker POI gates (landmarks/lairs):")
for cat in ("landmarks", "lairs"):
gates, quality, gate_desc, quality_desc = _get_category_config(cat)
check(f"{cat} -> POI_GATES", gates == POI_GATES)
check(f"{cat} -> POI_QUALITY", quality == POI_QUALITY)
check(f"{cat} drops roof_visible", "roof_visible" not in gates)
check(f"{cat} drops single_building", "single_building" not in gates)
check(f"{cat} has single_subject", "single_subject" in gates)
for g in gates:
check(f"{cat} gate '{g}' described", g in gate_desc)
for q in quality:
check(f"{cat} quality '{q}' described", q in quality_desc)
b_gates, _, _, _ = _get_category_config("buildings")
check("buildings unchanged", b_gates == BUILDING_GATES)
check("POI != BUILDING gates", POI_GATES != BUILDING_GATES)
def test_adapt_prompt() -> None:
print("grok prompt adaptation:")
prompt = "game sprite, single character, dwarf warrior, dwarf warrior, plated"
negative = "anime, crowd, watermark"
out = _adapt_prompt_for_grok(prompt, negative)
check("dedupes repeated clause", out.lower().count("dwarf warrior") == 1)
check("adds facing boost for units", "Isometric strategy game unit sprite" in out)
check("injects negative avoid", "Avoid:" in out and "anime" in out)
non_unit = "isometric map landmark sprite, single volcano, painted"
out2 = _adapt_prompt_for_grok(non_unit, "")
check("no facing boost for non-units", "Back turned to camera" not in out2)
def test_validate_png() -> None:
print("png validation:")
from PIL import Image
def _png(img: "Image.Image") -> bytes:
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
# Bright noise: large + high luminance -> valid.
import numpy as np
arr = (np.random.default_rng(7).integers(60, 255, (128, 128, 3))).astype("uint8")
valid = _png(Image.fromarray(arr, "RGB"))
_validate_png_bytes(valid) # must not raise
print(" ok accepts bright noisy image")
black = _png(Image.new("RGB", (128, 128), (0, 0, 0)))
expect_raises("rejects solid black", lambda: _validate_png_bytes(black), RuntimeError)
expect_raises("rejects tiny blob", lambda: _validate_png_bytes(b"\x89PNG\r\n" + b"\x00" * 64), RuntimeError)
def test_starter() -> None:
print("starter manifest:")
check("strip _dwarves_m", _unit_base_id("spearmen_dwarves_m") == "spearmen")
check("strip _f", _unit_base_id("founder_dwarves_f") == "founder")
check("no suffix untouched", _unit_base_id("worker") == "worker")
manifest = load_manifest()
ids = starter_sprite_ids(manifest)
expected = sum(len(manifest.get(k, [])) for k in ("units", "buildings", "landmarks", "lairs"))
check("id count matches manifest", len(ids) == expected, f"{len(ids)} vs {expected}")
check("unit id prefixed", "units/spearmen_dwarves_m" in ids)
check("lair id prefixed", "lairs/goblin_camp" in ids)
check("landmark id prefixed", "landmarks/volcano" in ids)
def main() -> int:
for t in (test_factory, test_poi_gates, test_adapt_prompt, test_validate_png, test_starter):
t()
print()
if _failures:
print(f"FAILED: {len(_failures)} -> {_failures}")
return 1
print("All Grok-pipeline unit tests passed.")
return 0
if __name__ == "__main__":
sys.exit(main())