magicciv/tools/autoplay-validate.py
Natalie 9f2f8f2eae feat(@projects/@magic-civilization): add autoplay validation and reporting tools
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-16 17:51:23 -07:00

258 lines
8.3 KiB
Python
Executable file

#!/usr/bin/env python3
"""
JSON Schema validator for autoplay output files.
Implements the subset of draft-07 used by the schemas:
type, required, additionalProperties, properties, propertyNames.pattern,
minimum, enum, items, pattern, $ref (local only).
stdlib only — no pip installs.
Usage:
# Validate a single JSON file against a named schema:
python3 tools/autoplay-validate.py --schema meta path/to/meta.json
# Validate every line of a JSONL file independently:
python3 tools/autoplay-validate.py --schema turn-stats-line --jsonl path/to/turn_stats.jsonl
# Legacy: validate against the flat result schema (default):
python3 tools/autoplay-validate.py path/to/result.json
Exits 0 if all valid, 1 with errors to stderr, 2 on usage error.
Available schema names (--schema):
turn-stats-line tools/schemas/autoplay/turn-stats-line.json
meta tools/schemas/autoplay/meta.json
events-line tools/schemas/autoplay/events-line.json
save tools/schemas/autoplay/save.json
result tools/autoplay-result-schema.json (legacy flat schema)
"""
from __future__ import annotations
import json
import re
import sys
from pathlib import Path
from typing import Any
TOOLS_DIR = Path(__file__).parent
SCHEMAS_DIR = TOOLS_DIR / "schemas" / "autoplay"
SCHEMA_PATHS: dict[str, Path] = {
"result": TOOLS_DIR / "autoplay-result-schema.json",
"turn-stats-line": SCHEMAS_DIR / "turn-stats-line.json",
"meta": SCHEMAS_DIR / "meta.json",
"events-line": SCHEMAS_DIR / "events-line.json",
"save": SCHEMAS_DIR / "save.json",
}
_DEFAULT_SCHEMA = "result"
def load_schema(name: str = _DEFAULT_SCHEMA) -> dict[str, Any]:
path = SCHEMA_PATHS.get(name)
if path is None:
raise ValueError(
f"unknown schema {name!r}. Available: {', '.join(sorted(SCHEMA_PATHS))}"
)
with path.open() as f:
return json.load(f)
_TYPE_CHECKS: dict[str, type | tuple[type, ...]] = {
"object": dict,
"array": list,
"string": str,
"integer": int,
"number": (int, float),
"boolean": bool,
"null": type(None),
}
def _resolve_ref(ref: str, root: dict[str, Any]) -> dict[str, Any]:
if not ref.startswith("#/"):
raise ValueError(f"only local refs supported, got {ref!r}")
node: Any = root
for part in ref[2:].split("/"):
if not isinstance(node, dict) or part not in node:
raise ValueError(f"ref {ref!r} does not resolve")
node = node[part]
return node
def _validate(
value: Any, schema: dict[str, Any], root: dict[str, Any], path: str
) -> list[str]:
errors: list[str] = []
if "$ref" in schema:
schema = _resolve_ref(schema["$ref"], root)
t = schema.get("type")
if t is not None:
expected = _TYPE_CHECKS.get(t)
if expected is None:
errors.append(f"{path}: unknown schema type {t!r}")
return errors
# bool is a subclass of int in Python; reject booleans as numbers.
if t in ("integer", "number") and isinstance(value, bool):
errors.append(f"{path}: expected {t}, got boolean")
return errors
if t == "integer" and isinstance(value, float) and not value.is_integer():
errors.append(f"{path}: expected integer, got float {value}")
return errors
if not isinstance(value, expected):
errors.append(f"{path}: expected {t}, got {type(value).__name__}")
return errors
if "enum" in schema:
if value not in schema["enum"]:
errors.append(f"{path}: {value!r} not in enum {schema['enum']}")
if "minimum" in schema and isinstance(value, (int, float)):
if value < schema["minimum"]:
errors.append(f"{path}: {value} < minimum {schema['minimum']}")
if "pattern" in schema and isinstance(value, str):
if not re.match(schema["pattern"], value):
errors.append(
f"{path}: {value!r} does not match pattern {schema['pattern']!r}"
)
if t == "object" and isinstance(value, dict):
props: dict[str, Any] = schema.get("properties", {})
required: list[str] = schema.get("required", [])
additional: bool | dict[str, Any] = schema.get("additionalProperties", True)
prop_names: dict[str, Any] | None = schema.get("propertyNames")
for req in required:
if req not in value:
errors.append(f"{path}: missing required property {req!r}")
for k, v in value.items():
kpath = f"{path}.{k}"
if prop_names is not None:
errors.extend(_validate(k, prop_names, root, f"{kpath}<key>"))
if k in props:
errors.extend(_validate(v, props[k], root, kpath))
elif additional is False:
errors.append(f"{path}: unexpected property {k!r}")
elif isinstance(additional, dict):
errors.extend(_validate(v, additional, root, kpath))
if t == "array" and isinstance(value, list):
item_schema = schema.get("items")
if item_schema is not None:
for i, item in enumerate(value):
errors.extend(_validate(item, item_schema, root, f"{path}[{i}]"))
return errors
def validate(data: Any, schema: dict[str, Any] | None = None) -> list[str]:
"""Validate data against schema. Returns list of error strings (empty = valid)."""
s = schema if schema is not None else load_schema()
return _validate(data, s, s, "$")
def _validate_file(path: Path, schema: dict[str, Any], jsonl: bool) -> int:
"""Validate one file. Returns error count."""
total_errors = 0
try:
text = path.read_text()
except OSError as e:
print(f"{path}: cannot read ({e})", file=sys.stderr)
return 1
if jsonl:
for lineno, raw in enumerate(text.splitlines(), start=1):
raw = raw.strip()
if not raw:
continue
try:
data = json.loads(raw)
except json.JSONDecodeError as e:
print(f"{path}:{lineno}: invalid JSON ({e})", file=sys.stderr)
total_errors += 1
continue
errs = validate(data, schema)
if errs:
total_errors += len(errs)
print(f"{path}:{lineno}: {len(errs)} error(s)", file=sys.stderr)
for e in errs:
print(f" {e}", file=sys.stderr)
else:
try:
data = json.loads(text)
except json.JSONDecodeError as e:
print(f"{path}: invalid JSON ({e})", file=sys.stderr)
return 1
errs = validate(data, schema)
if errs:
total_errors += len(errs)
print(f"{path}: {len(errs)} error(s)", file=sys.stderr)
for e in errs:
print(f" {e}", file=sys.stderr)
else:
print(f"{path}: OK", file=sys.stderr)
if jsonl and total_errors == 0:
print(f"{path}: OK", file=sys.stderr)
return total_errors
def _main(argv: list[str]) -> int:
args = argv[1:]
schema_name = _DEFAULT_SCHEMA
jsonl = False
files: list[str] = []
i = 0
while i < len(args):
a = args[i]
if a == "--schema":
i += 1
if i >= len(args):
print("ERROR: --schema requires a value", file=sys.stderr)
return 2
schema_name = args[i]
elif a == "--jsonl":
jsonl = True
elif a.startswith("--schema="):
schema_name = a[len("--schema="):]
elif a.startswith("-"):
print(f"ERROR: unknown flag {a!r}", file=sys.stderr)
return 2
else:
files.append(a)
i += 1
if not files:
print(
"usage: autoplay-validate.py [--schema NAME] [--jsonl] <file> [<file> ...]",
file=sys.stderr,
)
print(
f" schemas: {', '.join(sorted(SCHEMA_PATHS))}",
file=sys.stderr,
)
return 2
try:
schema = load_schema(schema_name)
except ValueError as e:
print(f"ERROR: {e}", file=sys.stderr)
return 2
total_errors = 0
for f in files:
total_errors += _validate_file(Path(f), schema, jsonl)
return 1 if total_errors else 0
if __name__ == "__main__":
sys.exit(_main(sys.argv))