#!/usr/bin/env python3 """Localization audit — fails when a GDScript UI file assigns a literal user-visible string to a UI text property instead of routing through ThemeVocabulary.lookup(). Scope: `src/game/engine/scenes/**/*.gd`. Inspects assignments to the UI properties most likely to surface strings at runtime: text, tooltip_text, placeholder_text, title, hint_tooltip A hit is flagged when the right-hand side is a bare string literal that looks human-readable (capitalized multi-word, spaces, or punctuation). Allow-list — these RHS patterns are NOT flagged: * ``""`` empty clears are fine * single snake_case token looks like a vocab key * ``ThemeVocabulary.lookup(...)`` the blessed path * ``res://...`` Godot resource URIs * ``#rrggbb`` / ``#rrggbbaa`` color / hex values * Strings containing only symbols / <2 chars Usage: python3 tools/validate-i18n.py [--json] [] Exits non-zero with a file:line summary on the first violation. Prints "OK: N scenes scanned, 0 hardcoded strings" on success. """ from __future__ import annotations import argparse import json import re import sys from pathlib import Path # Properties whose assignments we inspect. UI_PROPS = ("text", "tooltip_text", "placeholder_text", "title", "hint_tooltip") # Assignment pattern: `.text = "literal"` or `%Foo.text = "literal"`. # Captures the full double-quoted RHS so we can allow-list. ASSIGN_RE = re.compile( r"\.(?:" + "|".join(UI_PROPS) + r")\s*=\s*\"([^\"\n]*)\"" ) # Allow-list predicates. SINGLE_TOKEN_RE = re.compile(r"^[a-z][a-z0-9_]*$") RES_URI_RE = re.compile(r"^res://") HEX_COLOR_RE = re.compile(r"^#[0-9a-fA-F]{3,8}$") # Strings we consider "user-visible": contains a space OR starts with an # uppercase letter. This filters out "v" / "?" / "✓" / single lowercase words. USER_VISIBLE_RE = re.compile(r"(\s|^[A-Z])") # Pure format strings — all letters are format specifier chars (s, d, f, x, etc.) # or GDScript-style format chars. No actual human-readable words. # e.g. "%s: %d", "(%d, %d)", "%s · %s" — punctuation + placeholders only. PURE_FORMAT_RE = re.compile(r"^[^a-zA-Z]*(%[0-9]*[sdfix]|\\n|\{[^}]*\}|[^a-zA-Z])*$") def is_allowed(rhs: str) -> bool: """Return True when RHS is NOT a hardcoded user-visible string.""" if rhs == "": return True if len(rhs) <= 1: return True # single char (e.g. "X" close button, "▶" arrow) if SINGLE_TOKEN_RE.match(rhs): return True # vocab key, not user-visible text if RES_URI_RE.match(rhs): return True if HEX_COLOR_RE.match(rhs): return True if PURE_FORMAT_RE.match(rhs): return True # format template with no literal words (e.g. "%s: %d") if not USER_VISIBLE_RE.search(rhs): return True return False def scan_file(path: Path) -> list[tuple[int, str, str]]: """Return (line_no, property, rhs) tuples for each violation.""" hits: list[tuple[int, str, str]] = [] try: text = path.read_text(encoding="utf-8") except UnicodeDecodeError: return hits for lineno, line in enumerate(text.splitlines(), 1): # Skip comments — a trailing `#` chunk doesn't produce a runtime string. stripped = line.lstrip() if stripped.startswith("#"): continue # Skip lines that already route through ThemeVocabulary. if "ThemeVocabulary.lookup(" in line: continue # Skip @export var declarations — inspector defaults, not runtime # UI strings. (Authors can still pass vocab keys via the inspector.) if stripped.startswith("@export"): continue match = ASSIGN_RE.search(line) if not match: continue rhs = match.group(1) if is_allowed(rhs): continue # Extract the property name from the full match. prop_match = re.search( r"\.(" + "|".join(UI_PROPS) + r")\s*=", match.group(0) ) prop = prop_match.group(1) if prop_match else "?" hits.append((lineno, prop, rhs)) return hits def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "root", nargs="?", default="src/game/engine/scenes", help="Directory to scan (default: src/game/engine/scenes)", ) parser.add_argument("--json", action="store_true", help="JSON output.") args = parser.parse_args() repo_root = Path(__file__).resolve().parents[1] scan_root = (repo_root / args.root).resolve() if not scan_root.exists(): print(f"error: scan root missing: {scan_root}", file=sys.stderr) return 2 # Proof / iter test scenes and AI arena tools are developer-facing, # never shipped to players. Localization scope is player-facing UI only. EXCLUDE_DIRS = ("scenes/tests", "arena_overlay", "world_map_arena") files = [ f for f in sorted(scan_root.rglob("*.gd")) if not any(part in f.as_posix() for part in EXCLUDE_DIRS) ] all_hits: dict[str, list[tuple[int, str, str]]] = {} for f in files: hits = scan_file(f) if hits: all_hits[str(f.relative_to(repo_root))] = hits total_hits = sum(len(h) for h in all_hits.values()) if args.json: out = { "scanned": len(files), "hits": total_hits, "files": {p: [{"line": ln, "prop": pr, "rhs": r} for (ln, pr, r) in hs] for p, hs in all_hits.items()}, } print(json.dumps(out, indent=2)) return 0 if total_hits == 0 else 1 if total_hits == 0: print(f"OK: {len(files)} scenes scanned, 0 hardcoded UI strings.") return 0 print(f"FAIL: {total_hits} hardcoded UI string(s) in {len(all_hits)} file(s):") for path, hits in all_hits.items(): for lineno, prop, rhs in hits: print(f" {path}:{lineno} .{prop} = {rhs!r}") print( "\nFix: route each through ThemeVocabulary.lookup(\"\") " "and add the key to public/games/age-of-dwarves/vocabulary.json." ) return 1 if __name__ == "__main__": sys.exit(main())