chore(sprite-generation): 🔧 Update scoring pipeline weights in sprite-generation YAML config
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
7f2c0e731d
commit
90f71f8c51
1 changed files with 22 additions and 8 deletions
|
|
@ -5,11 +5,15 @@
|
|||
# Rejections at any stage go back to generation.
|
||||
#
|
||||
# Fields per stage:
|
||||
# name: Human-readable name (also used in DB scored_by)
|
||||
# backend: "model-boss" (local VLM) or "claude" (API)
|
||||
# model: Model ID for the backend
|
||||
# threshold: Minimum confidence (0-1) for quality scores to pass
|
||||
# purpose: What this stage is checking for
|
||||
# name: Human-readable name (also used in DB scored_by)
|
||||
# backend: "model-boss" (local VLM) or "claude" (API)
|
||||
# model: Model ID for the backend
|
||||
# threshold: Minimum confidence (0-1) for quality scores to pass
|
||||
# tiebreaker_range: If set, re-scores quality when confidence is within ±range of threshold,
|
||||
# then averages pass-2 and pass-3 scores. Omit or 0.0 to disable.
|
||||
# single_pass: If true, uses one combined gate+quality call instead of two-pass.
|
||||
# Best for expensive high-accuracy models. Default: false.
|
||||
# purpose: What this stage is checking for
|
||||
#
|
||||
# The pipeline stops escalating when `target_approved` variants have passed
|
||||
# ALL stages. Only the deficit is sent to the next stage.
|
||||
|
|
@ -28,16 +32,26 @@ stages:
|
|||
backend: model-boss
|
||||
model: qwen3-vl-8b-instruct
|
||||
threshold: 0.40
|
||||
purpose: "Free local bulk filter — catches obvious gate failures (wrong direction, modern clothes, wrong background) before spending API dollars."
|
||||
tiebreaker_range: 0.12
|
||||
purpose: "Free local bulk filter — catches obvious gate failures (wrong direction, modern clothes, wrong background) before spending API dollars. Tiebreaker re-scores quality when confidence is borderline (±0.12 of threshold)."
|
||||
|
||||
- name: haiku
|
||||
backend: claude
|
||||
model: haiku
|
||||
threshold: 0.50
|
||||
purpose: "Cheap API filter — re-evaluates independently with stricter threshold. Only sees VLM-approved candidates."
|
||||
tiebreaker_range: 0.08
|
||||
purpose: "Cheap API filter — re-evaluates independently with stricter threshold. Only sees VLM-approved candidates. Tiebreaker re-scores quality when confidence is borderline (±0.08 of threshold)."
|
||||
|
||||
- name: sonnet
|
||||
backend: claude
|
||||
model: sonnet
|
||||
threshold: 0.58
|
||||
tiebreaker_range: 0.08
|
||||
purpose: "Mid-tier filter — stronger than Haiku, cheaper than Opus. Two-pass with tiebreaker on borderline quality scores."
|
||||
|
||||
- name: opus
|
||||
backend: claude
|
||||
model: opus
|
||||
threshold: 0.65
|
||||
purpose: "Final quality approval — highest accuracy, only sees pre-filtered candidates. Per-dimension floor (45) also enforced."
|
||||
single_pass: true
|
||||
purpose: "Final quality approval — single combined call (expensive). Highest accuracy, only sees pre-filtered candidates. Per-dimension floor (45) also enforced."
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue