#!/usr/bin/env python3 """ JSON Schema validator for autoplay output files. Implements the subset of draft-07 used by the schemas: type, required, additionalProperties, properties, propertyNames.pattern, minimum, enum, items, pattern, $ref (local only). stdlib only — no pip installs. Usage: # Validate a single JSON file against a named schema: python3 tools/autoplay-validate.py --schema meta path/to/meta.json # Validate every line of a JSONL file independently: python3 tools/autoplay-validate.py --schema turn-stats-line --jsonl path/to/turn_stats.jsonl # Legacy: validate against the flat result schema (default): python3 tools/autoplay-validate.py path/to/result.json Exits 0 if all valid, 1 with errors to stderr, 2 on usage error. Available schema names (--schema): turn-stats-line tools/schemas/autoplay/turn-stats-line.json meta tools/schemas/autoplay/meta.json events-line tools/schemas/autoplay/events-line.json save tools/schemas/autoplay/save.json result tools/autoplay-result-schema.json (legacy flat schema) """ from __future__ import annotations import json import re import sys from pathlib import Path from typing import Any TOOLS_DIR = Path(__file__).parent SCHEMAS_DIR = TOOLS_DIR / "schemas" / "autoplay" SCHEMA_PATHS: dict[str, Path] = { "result": TOOLS_DIR / "autoplay-result-schema.json", "turn-stats-line": SCHEMAS_DIR / "turn-stats-line.json", "meta": SCHEMAS_DIR / "meta.json", "events-line": SCHEMAS_DIR / "events-line.json", "save": SCHEMAS_DIR / "save.json", } _DEFAULT_SCHEMA = "result" def load_schema(name: str = _DEFAULT_SCHEMA) -> dict[str, Any]: path = SCHEMA_PATHS.get(name) if path is None: raise ValueError( f"unknown schema {name!r}. Available: {', '.join(sorted(SCHEMA_PATHS))}" ) with path.open() as f: return json.load(f) _TYPE_CHECKS: dict[str, type | tuple[type, ...]] = { "object": dict, "array": list, "string": str, "integer": int, "number": (int, float), "boolean": bool, "null": type(None), } def _resolve_ref(ref: str, root: dict[str, Any]) -> dict[str, Any]: if not ref.startswith("#/"): raise ValueError(f"only local refs supported, got {ref!r}") node: Any = root for part in ref[2:].split("/"): if not isinstance(node, dict) or part not in node: raise ValueError(f"ref {ref!r} does not resolve") node = node[part] return node def _validate( value: Any, schema: dict[str, Any], root: dict[str, Any], path: str ) -> list[str]: errors: list[str] = [] if "$ref" in schema: schema = _resolve_ref(schema["$ref"], root) t = schema.get("type") if t is not None: expected = _TYPE_CHECKS.get(t) if expected is None: errors.append(f"{path}: unknown schema type {t!r}") return errors # bool is a subclass of int in Python; reject booleans as numbers. if t in ("integer", "number") and isinstance(value, bool): errors.append(f"{path}: expected {t}, got boolean") return errors if t == "integer" and isinstance(value, float) and not value.is_integer(): errors.append(f"{path}: expected integer, got float {value}") return errors if not isinstance(value, expected): errors.append(f"{path}: expected {t}, got {type(value).__name__}") return errors if "enum" in schema: if value not in schema["enum"]: errors.append(f"{path}: {value!r} not in enum {schema['enum']}") if "minimum" in schema and isinstance(value, (int, float)): if value < schema["minimum"]: errors.append(f"{path}: {value} < minimum {schema['minimum']}") if "maximum" in schema and isinstance(value, (int, float)): if value > schema["maximum"]: errors.append(f"{path}: {value} > maximum {schema['maximum']}") if "pattern" in schema and isinstance(value, str): if not re.match(schema["pattern"], value): errors.append( f"{path}: {value!r} does not match pattern {schema['pattern']!r}" ) if t == "object" and isinstance(value, dict): props: dict[str, Any] = schema.get("properties", {}) required: list[str] = schema.get("required", []) additional: bool | dict[str, Any] = schema.get("additionalProperties", True) prop_names: dict[str, Any] | None = schema.get("propertyNames") for req in required: if req not in value: errors.append(f"{path}: missing required property {req!r}") for k, v in value.items(): kpath = f"{path}.{k}" if prop_names is not None: errors.extend(_validate(k, prop_names, root, f"{kpath}")) if k in props: errors.extend(_validate(v, props[k], root, kpath)) elif additional is False: errors.append(f"{path}: unexpected property {k!r}") elif isinstance(additional, dict): errors.extend(_validate(v, additional, root, kpath)) if t == "array" and isinstance(value, list): item_schema = schema.get("items") if item_schema is not None: for i, item in enumerate(value): errors.extend(_validate(item, item_schema, root, f"{path}[{i}]")) return errors def validate(data: Any, schema: dict[str, Any] | None = None) -> list[str]: """Validate data against schema. Returns list of error strings (empty = valid).""" s = schema if schema is not None else load_schema() return _validate(data, s, s, "$") def _validate_file(path: Path, schema: dict[str, Any], jsonl: bool) -> int: """Validate one file. Returns error count.""" total_errors = 0 try: text = path.read_text() except OSError as e: print(f"{path}: cannot read ({e})", file=sys.stderr) return 1 if jsonl: for lineno, raw in enumerate(text.splitlines(), start=1): raw = raw.strip() if not raw: continue try: data = json.loads(raw) except json.JSONDecodeError as e: print(f"{path}:{lineno}: invalid JSON ({e})", file=sys.stderr) total_errors += 1 continue errs = validate(data, schema) if errs: total_errors += len(errs) print(f"{path}:{lineno}: {len(errs)} error(s)", file=sys.stderr) for e in errs: print(f" {e}", file=sys.stderr) else: try: data = json.loads(text) except json.JSONDecodeError as e: print(f"{path}: invalid JSON ({e})", file=sys.stderr) return 1 errs = validate(data, schema) if errs: total_errors += len(errs) print(f"{path}: {len(errs)} error(s)", file=sys.stderr) for e in errs: print(f" {e}", file=sys.stderr) else: print(f"{path}: OK", file=sys.stderr) if jsonl and total_errors == 0: print(f"{path}: OK", file=sys.stderr) return total_errors def _main(argv: list[str]) -> int: args = argv[1:] schema_name = _DEFAULT_SCHEMA jsonl = False files: list[str] = [] i = 0 while i < len(args): a = args[i] if a == "--schema": i += 1 if i >= len(args): print("ERROR: --schema requires a value", file=sys.stderr) return 2 schema_name = args[i] elif a == "--jsonl": jsonl = True elif a.startswith("--schema="): schema_name = a[len("--schema="):] elif a.startswith("-"): print(f"ERROR: unknown flag {a!r}", file=sys.stderr) return 2 else: files.append(a) i += 1 if not files: print( "usage: autoplay-validate.py [--schema NAME] [--jsonl] [ ...]", file=sys.stderr, ) print( f" schemas: {', '.join(sorted(SCHEMA_PATHS))}", file=sys.stderr, ) return 2 try: schema = load_schema(schema_name) except ValueError as e: print(f"ERROR: {e}", file=sys.stderr) return 2 total_errors = 0 for f in files: total_errors += _validate_file(Path(f), schema, jsonl) return 1 if total_errors else 0 if __name__ == "__main__": sys.exit(_main(sys.argv))