216 lines
8.1 KiB
Python
Executable file
216 lines
8.1 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Localization audit — fails when a GDScript UI file assigns a literal
|
|
user-visible string to a UI text property instead of routing through
|
|
ThemeVocabulary.lookup().
|
|
|
|
Scope: `src/game/engine/scenes/**/*.gd`. Inspects assignments to the UI
|
|
properties most likely to surface strings at runtime:
|
|
|
|
text, tooltip_text, placeholder_text, title, hint_tooltip
|
|
|
|
A hit is flagged when the right-hand side is a bare string literal that
|
|
looks human-readable (capitalized multi-word, spaces, or punctuation).
|
|
|
|
Allow-list — these RHS patterns are NOT flagged:
|
|
|
|
* ``""`` empty clears are fine
|
|
* single snake_case token looks like a vocab key
|
|
* ``ThemeVocabulary.lookup(...)`` the blessed path
|
|
* ``res://...`` Godot resource URIs
|
|
* ``#rrggbb`` / ``#rrggbbaa`` color / hex values
|
|
* Strings containing only symbols / <2 chars
|
|
|
|
Usage:
|
|
python3 tools/validate-i18n.py [--json] [<scenes_dir>]
|
|
|
|
Exits non-zero with a file:line summary on the first violation. Prints
|
|
"OK: N scenes scanned, 0 hardcoded strings" on success.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Properties whose assignments we inspect.
|
|
UI_PROPS = ("text", "tooltip_text", "placeholder_text", "title", "hint_tooltip")
|
|
|
|
# Assignment pattern for .gd files: `.text = "literal"` or `%Foo.text = "literal"`.
|
|
# Captures the full double-quoted RHS so we can allow-list.
|
|
ASSIGN_RE = re.compile(
|
|
r"\.(?:" + "|".join(UI_PROPS) + r")\s*=\s*\"([^\"\n]*)\""
|
|
)
|
|
|
|
# Assignment pattern for .tscn files: `text = "literal"` at column 0.
|
|
# Godot scene-file property assignments are not dot-prefixed; they sit at
|
|
# line start inside a [node ...] block. We only care about UI_PROPS (text,
|
|
# tooltip_text, etc.) — NOT node `name =` attributes, which are identifiers.
|
|
TSCN_ASSIGN_RE = re.compile(
|
|
r"^(" + "|".join(UI_PROPS) + r")\s*=\s*\"([^\"\n]*)\""
|
|
)
|
|
|
|
# Allow-list predicates.
|
|
SINGLE_TOKEN_RE = re.compile(r"^[a-z][a-z0-9_]*$")
|
|
RES_URI_RE = re.compile(r"^res://")
|
|
HEX_COLOR_RE = re.compile(r"^#[0-9a-fA-F]{3,8}$")
|
|
# Strings we consider "user-visible": contains a space OR starts with an
|
|
# uppercase letter. This filters out "v" / "?" / "✓" / single lowercase words.
|
|
USER_VISIBLE_RE = re.compile(r"(\s|^[A-Z])")
|
|
# Pure format strings — all letters are format specifier chars (s, d, f, x, etc.)
|
|
# or GDScript-style format chars. No actual human-readable words.
|
|
# e.g. "%s: %d", "(%d, %d)", "%s · %s" — punctuation + placeholders only.
|
|
PURE_FORMAT_RE = re.compile(r"^[^a-zA-Z]*(%[0-9]*[sdfix]|\\n|\{[^}]*\}|[^a-zA-Z])*$")
|
|
|
|
|
|
def is_allowed(rhs: str) -> bool:
|
|
"""Return True when RHS is NOT a hardcoded user-visible string."""
|
|
if rhs == "":
|
|
return True
|
|
if len(rhs) <= 1:
|
|
return True # single char (e.g. "X" close button, "▶" arrow)
|
|
if SINGLE_TOKEN_RE.match(rhs):
|
|
return True # vocab key, not user-visible text
|
|
if RES_URI_RE.match(rhs):
|
|
return True
|
|
if HEX_COLOR_RE.match(rhs):
|
|
return True
|
|
if PURE_FORMAT_RE.match(rhs):
|
|
return True # format template with no literal words (e.g. "%s: %d")
|
|
if not USER_VISIBLE_RE.search(rhs):
|
|
return True
|
|
return False
|
|
|
|
|
|
def scan_file(path: Path) -> list[tuple[int, str, str]]:
|
|
"""Return (line_no, property, rhs) tuples for each violation."""
|
|
hits: list[tuple[int, str, str]] = []
|
|
try:
|
|
text = path.read_text(encoding="utf-8")
|
|
except UnicodeDecodeError:
|
|
return hits
|
|
for lineno, line in enumerate(text.splitlines(), 1):
|
|
# Skip comments — a trailing `#` chunk doesn't produce a runtime string.
|
|
stripped = line.lstrip()
|
|
if stripped.startswith("#"):
|
|
continue
|
|
# Skip lines that already route through ThemeVocabulary.
|
|
if "ThemeVocabulary.lookup(" in line:
|
|
continue
|
|
# Skip @export var declarations — inspector defaults, not runtime
|
|
# UI strings. (Authors can still pass vocab keys via the inspector.)
|
|
if stripped.startswith("@export"):
|
|
continue
|
|
match = ASSIGN_RE.search(line)
|
|
if not match:
|
|
continue
|
|
rhs = match.group(1)
|
|
if is_allowed(rhs):
|
|
continue
|
|
# Extract the property name from the full match.
|
|
prop_match = re.search(
|
|
r"\.(" + "|".join(UI_PROPS) + r")\s*=", match.group(0)
|
|
)
|
|
prop = prop_match.group(1) if prop_match else "?"
|
|
hits.append((lineno, prop, rhs))
|
|
return hits
|
|
|
|
|
|
def scan_tscn(path: Path) -> list[tuple[int, str, str]]:
|
|
"""Return (line_no, property, rhs) tuples for hardcoded UI strings in
|
|
Godot scene files. Only inspects UI_PROPS assignments — NOT node
|
|
`name =` attributes (those are identifiers, not user-visible text).
|
|
Scene inspector defaults are user-visible unless the controller
|
|
overrides them at runtime; since we can't tell statically, we flag
|
|
everything and require authors to either (a) override in _ready() +
|
|
drop the default from the .tscn, or (b) use an allow-listed vocab key.
|
|
"""
|
|
hits: list[tuple[int, str, str]] = []
|
|
try:
|
|
text = path.read_text(encoding="utf-8")
|
|
except UnicodeDecodeError:
|
|
return hits
|
|
for lineno, line in enumerate(text.splitlines(), 1):
|
|
# .tscn files use ; for comments inside [gd_resource] SubResource
|
|
# blocks, but property assignments are always at column 0.
|
|
if line.startswith(";") or line.startswith("#"):
|
|
continue
|
|
match = TSCN_ASSIGN_RE.match(line)
|
|
if not match:
|
|
continue
|
|
prop = match.group(1)
|
|
rhs = match.group(2)
|
|
if is_allowed(rhs):
|
|
continue
|
|
hits.append((lineno, prop, rhs))
|
|
return hits
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"root",
|
|
nargs="?",
|
|
default="src/game/engine/scenes",
|
|
help="Directory to scan (default: src/game/engine/scenes)",
|
|
)
|
|
parser.add_argument("--json", action="store_true", help="JSON output.")
|
|
args = parser.parse_args()
|
|
|
|
repo_root = Path(__file__).resolve().parents[1]
|
|
scan_root = (repo_root / args.root).resolve()
|
|
if not scan_root.exists():
|
|
print(f"error: scan root missing: {scan_root}", file=sys.stderr)
|
|
return 2
|
|
|
|
# Proof / iter test scenes and AI arena tools are developer-facing,
|
|
# never shipped to players. Localization scope is player-facing UI only.
|
|
EXCLUDE_DIRS = ("scenes/tests", "arena_overlay", "world_map_arena")
|
|
gd_files = [
|
|
f for f in sorted(scan_root.rglob("*.gd"))
|
|
if not any(part in f.as_posix() for part in EXCLUDE_DIRS)
|
|
]
|
|
tscn_files = [
|
|
f for f in sorted(scan_root.rglob("*.tscn"))
|
|
if not any(part in f.as_posix() for part in EXCLUDE_DIRS)
|
|
]
|
|
files = gd_files + tscn_files
|
|
all_hits: dict[str, list[tuple[int, str, str]]] = {}
|
|
for f in gd_files:
|
|
hits = scan_file(f)
|
|
if hits:
|
|
all_hits[str(f.relative_to(repo_root))] = hits
|
|
for f in tscn_files:
|
|
hits = scan_tscn(f)
|
|
if hits:
|
|
all_hits[str(f.relative_to(repo_root))] = hits
|
|
|
|
total_hits = sum(len(h) for h in all_hits.values())
|
|
if args.json:
|
|
out = {
|
|
"scanned": len(files),
|
|
"hits": total_hits,
|
|
"files": {p: [{"line": ln, "prop": pr, "rhs": r}
|
|
for (ln, pr, r) in hs]
|
|
for p, hs in all_hits.items()},
|
|
}
|
|
print(json.dumps(out, indent=2))
|
|
return 0 if total_hits == 0 else 1
|
|
|
|
if total_hits == 0:
|
|
print(f"OK: {len(files)} scenes scanned, 0 hardcoded UI strings.")
|
|
return 0
|
|
print(f"FAIL: {total_hits} hardcoded UI string(s) in {len(all_hits)} file(s):")
|
|
for path, hits in all_hits.items():
|
|
for lineno, prop, rhs in hits:
|
|
print(f" {path}:{lineno} .{prop} = {rhs!r}")
|
|
print(
|
|
"\nFix: route each through ThemeVocabulary.lookup(\"<key>\") "
|
|
"and add the key to public/games/age-of-dwarves/vocabulary.json."
|
|
)
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|