diff --git a/tools/sprite-generation/engine/generator.py b/tools/sprite-generation/engine/generator.py index 0c7e6423..c83a7b1a 100644 --- a/tools/sprite-generation/engine/generator.py +++ b/tools/sprite-generation/engine/generator.py @@ -21,8 +21,8 @@ from engine.registry import SpriteRegistry logger = logging.getLogger(__name__) # Max concurrent generation requests awaiting results from model-boss. -# model-boss handles internal queuing per backend capacity (diffusion=1 at a time). -MAX_CONCURRENT = 8 +# Diffusion backend processes 1 image at a time — sending more just causes 503s. +MAX_CONCURRENT = 1 class SpriteGenerator: diff --git a/tools/sprite-generation/engine/ranker.py b/tools/sprite-generation/engine/ranker.py index b765d524..844b16b3 100644 --- a/tools/sprite-generation/engine/ranker.py +++ b/tools/sprite-generation/engine/ranker.py @@ -62,14 +62,17 @@ CATEGORY_CRITERIA: dict[str, str] = { "COMPOSITION CHECK: Must be a seamless, tileable ground texture. No distinct objects, no borders." ), "units": ( - "PERSPECTIVE CHECK: Must be a front-facing or slight 3/4 view of a single character, " - "like a Heroes of Might and Magic III or Civilization V unit sprite. " - "Straight-on or slightly elevated angle is CORRECT for character sprites. " - "Full body visible, centered, on a clean green chroma key background. " - "No background scenery, no terrain, no other characters.\n" - "COMPOSITION CHECK: ONE character only. Clean silhouette. " - "Must read clearly at 64x64 pixels. If there are multiple figures, a turnaround sheet, " - "or a busy background, score 0.1-0.2." + "This is a unit sprite for a fantasy 4X strategy game. Score each of the 10 dimensions precisely.\n" + "CAMERA: Slightly elevated 3/4 isometric, like Civ5/HoMM3 unit sprites.\n" + "FACING: Character MUST face BOTTOM-LEFT (southwest). Body oriented toward lower-left corner.\n" + "COMPOSITION: ONE character only. Clean silhouette. Must read clearly at 64x64.\n" + "SUBJECT: Must be the correct unit type with correct weapons/role.\n" + "RACE: Must match racial proportions (dwarves = short, stocky, broad — NOT tall humans).\n" + "GENDER: Male dwarves have thick braided beards. Female dwarves have NO beard, braided hair.\n" + "EQUIPMENT: Weapons, armor, gear must match unit type and era.\n" + "POSE: Full body visible, feet visible, dynamic action-ready stance.\n" + "BACKGROUND: Must be clean solid green chroma key. No scenery, terrain, gradients.\n" + "ART STYLE: Bold painterly fantasy like DOTA 2 / Warcraft III. Rich colors, strong shapes, stylized. NOT photorealistic, NOT anime, NOT pixel art." ), "buildings": ( "PERSPECTIVE CHECK: Must be 3/4 isometric view from above — like a Civilization V building icon. " @@ -113,14 +116,11 @@ This image was generated as a game sprite: {category_criteria} -Score on 4 dimensions (0.0 to 1.0). Be HARSH — most AI art scores 0.3-0.5. Only genuinely game-ready art gets 0.7+. +Score on the dimensions below (0.0 to 1.0). Be HARSH — most AI art scores 0.3-0.5. Only genuinely game-ready art gets 0.7+. -1. **perspective**: Is the viewing angle correct for this category? (see criteria above) -2. **composition**: Is it ONE thing, clean, properly framed? No collage, no clutter, no borders? -3. **subject_accuracy**: Does it actually depict {entity_id}? Is it recognizable as that thing? -4. **production_quality**: Would you ship this in a commercial strategy game? Sharp, stylistically consistent, no artifacts? +{score_instructions} -{{"perspective": 0.0, "composition": 0.0, "subject_accuracy": 0.0, "production_quality": 0.0}}""" +{score_template}""" def _ensure_sdk(): @@ -133,18 +133,57 @@ def _ensure_sdk(): SCORE_DIMENSIONS = ("perspective", "composition", "subject_accuracy", "production_quality") +# Units get an extra dimension for facing direction +UNIT_SCORE_DIMENSIONS = ( + "camera_angle", + "facing_direction", + "composition", + "subject_type", + "race_accuracy", + "gender_accuracy", + "equipment_accuracy", + "pose_quality", + "background_compliance", + "art_style", +) + +_DIMENSION_DESCRIPTIONS: dict[str, str] = { + # Shared dimensions (used by non-unit categories too) + "perspective": "Is the viewing angle correct for this category? (see criteria above)", + "composition": "Is it ONE thing, clean, properly framed? No collage, no clutter, no borders?", + "subject_accuracy": "Does it actually depict the named entity? Is it recognizable as that thing?", + "production_quality": "Would you ship this in a commercial strategy game? Sharp, stylistically consistent, no artifacts?", + # Unit-specific dimensions (10 total) + "camera_angle": "Is it a slightly elevated 3/4 isometric view? Like Civ5/HoMM3 unit sprites. NOT top-down, NOT side-on, NOT first-person. Score 0.1-0.2 if wrong angle.", + "facing_direction": "Is the character facing BOTTOM-LEFT (southwest)? Body and head oriented toward lower-left corner. Score 0.1-0.2 if facing right, forward, or any other direction.", + "subject_type": "Is this the correct type of unit? A spearman holds a spear, cavalry is mounted, a siege engine is a machine. Score 0.1-0.2 if wrong unit type.", + "race_accuracy": "Does it match the race? Dwarves = short, stocky, broad. NOT tall/slender humans. Score 0.1-0.2 if proportions look human-sized or elven.", + "gender_accuracy": "Correct gender presentation? Male dwarves have thick braided beards. Female dwarves have NO beard, braided hair, sturdy feminine build. Score 0.1-0.2 if gender cues are wrong or ambiguous.", + "equipment_accuracy": "Correct weapons, armor, and gear for the unit and era? A musketeer has a musket, a founder has civilian tools, power armor looks futuristic. Score 0.1-0.2 if equipment doesn't match.", + "pose_quality": "Dynamic, readable pose? Full body visible, feet visible, action-ready stance. NOT a static T-pose, NOT cut off at knees. Score 0.1-0.2 if pose is stiff or cropped.", + "background_compliance": "Clean solid green chroma key background? No scenery, no terrain, no gradients, no other characters. Score 0.1-0.2 if background is not clean green.", + "art_style": "Does it have a bold, painterly fantasy game art style like DOTA 2 hero portraits or Warcraft III unit art? Rich colors, strong shapes, stylized proportions. NOT photorealistic, NOT anime, NOT pixel art, NOT generic AI slop. Score 0.7+ only if it looks like professional fantasy game concept art.", +} -def _parse_scores(raw: str) -> dict[str, float] | None: +def _build_score_instructions(dims: tuple[str, ...]) -> str: + lines = [] + for i, d in enumerate(dims, 1): + desc = _DIMENSION_DESCRIPTIONS.get(d, "") + lines.append(f"{i}. **{d}**: {desc}") + return "\n".join(lines) + + +def _parse_scores(raw: str, dimensions: tuple[str, ...] = SCORE_DIMENSIONS) -> dict[str, float] | None: """Parse reviewer's JSON response into dimension scores.""" _ensure_sdk() from claude_code_batch_sdk.parsing import parse_json_response data = parse_json_response(raw) if data is None: return None - if not all(k in data for k in SCORE_DIMENSIONS): + if not all(k in data for k in dimensions): return None - return {k: max(0.0, min(1.0, float(data[k]))) for k in SCORE_DIMENSIONS} + return {k: max(0.0, min(1.0, float(data[k]))) for k in dimensions} class SpriteRanker: @@ -170,12 +209,17 @@ class SpriteRanker: filename = Path(raw_path).name category = sprite["category"] criteria = CATEGORY_CRITERIA.get(category, "Evaluate overall quality and fitness for use as a game sprite.") + dims = UNIT_SCORE_DIMENSIONS if category == "units" else SCORE_DIMENSIONS + score_instructions = _build_score_instructions(dims) + score_template = json.dumps({d: 0.0 for d in dims}) prompt = RANKING_PROMPT_TEMPLATE.format( filename=filename, category=category, entity_id=sprite["entity_id"], prompt=sprite["prompt"][:200], category_criteria=criteria, + score_instructions=score_instructions, + score_template=score_template, ) raw = await self.client.generate( @@ -187,7 +231,7 @@ class SpriteRanker: if raw is None: return None - return _parse_scores(raw) + return _parse_scores(raw, dimensions=dims) async def rank_sprite(self, sprite_id: str) -> list[dict]: """Rank all completed variants for a sprite.