258 lines
8.3 KiB
Python
Executable file
258 lines
8.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
JSON Schema validator for autoplay output files.
|
|
|
|
Implements the subset of draft-07 used by the schemas:
|
|
type, required, additionalProperties, properties, propertyNames.pattern,
|
|
minimum, enum, items, pattern, $ref (local only).
|
|
|
|
stdlib only — no pip installs.
|
|
|
|
Usage:
|
|
# Validate a single JSON file against a named schema:
|
|
python3 tools/autoplay-validate.py --schema meta path/to/meta.json
|
|
|
|
# Validate every line of a JSONL file independently:
|
|
python3 tools/autoplay-validate.py --schema turn-stats-line --jsonl path/to/turn_stats.jsonl
|
|
|
|
# Legacy: validate against the flat result schema (default):
|
|
python3 tools/autoplay-validate.py path/to/result.json
|
|
|
|
Exits 0 if all valid, 1 with errors to stderr, 2 on usage error.
|
|
|
|
Available schema names (--schema):
|
|
turn-stats-line tools/schemas/autoplay/turn-stats-line.json
|
|
meta tools/schemas/autoplay/meta.json
|
|
events-line tools/schemas/autoplay/events-line.json
|
|
save tools/schemas/autoplay/save.json
|
|
result tools/autoplay-result-schema.json (legacy flat schema)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
TOOLS_DIR = Path(__file__).parent
|
|
SCHEMAS_DIR = TOOLS_DIR / "schemas" / "autoplay"
|
|
|
|
SCHEMA_PATHS: dict[str, Path] = {
|
|
"result": TOOLS_DIR / "autoplay-result-schema.json",
|
|
"turn-stats-line": SCHEMAS_DIR / "turn-stats-line.json",
|
|
"meta": SCHEMAS_DIR / "meta.json",
|
|
"events-line": SCHEMAS_DIR / "events-line.json",
|
|
"save": SCHEMAS_DIR / "save.json",
|
|
}
|
|
|
|
_DEFAULT_SCHEMA = "result"
|
|
|
|
|
|
def load_schema(name: str = _DEFAULT_SCHEMA) -> dict[str, Any]:
|
|
path = SCHEMA_PATHS.get(name)
|
|
if path is None:
|
|
raise ValueError(
|
|
f"unknown schema {name!r}. Available: {', '.join(sorted(SCHEMA_PATHS))}"
|
|
)
|
|
with path.open() as f:
|
|
return json.load(f)
|
|
|
|
|
|
_TYPE_CHECKS: dict[str, type | tuple[type, ...]] = {
|
|
"object": dict,
|
|
"array": list,
|
|
"string": str,
|
|
"integer": int,
|
|
"number": (int, float),
|
|
"boolean": bool,
|
|
"null": type(None),
|
|
}
|
|
|
|
|
|
def _resolve_ref(ref: str, root: dict[str, Any]) -> dict[str, Any]:
|
|
if not ref.startswith("#/"):
|
|
raise ValueError(f"only local refs supported, got {ref!r}")
|
|
node: Any = root
|
|
for part in ref[2:].split("/"):
|
|
if not isinstance(node, dict) or part not in node:
|
|
raise ValueError(f"ref {ref!r} does not resolve")
|
|
node = node[part]
|
|
return node
|
|
|
|
|
|
def _validate(
|
|
value: Any, schema: dict[str, Any], root: dict[str, Any], path: str
|
|
) -> list[str]:
|
|
errors: list[str] = []
|
|
|
|
if "$ref" in schema:
|
|
schema = _resolve_ref(schema["$ref"], root)
|
|
|
|
t = schema.get("type")
|
|
if t is not None:
|
|
expected = _TYPE_CHECKS.get(t)
|
|
if expected is None:
|
|
errors.append(f"{path}: unknown schema type {t!r}")
|
|
return errors
|
|
# bool is a subclass of int in Python; reject booleans as numbers.
|
|
if t in ("integer", "number") and isinstance(value, bool):
|
|
errors.append(f"{path}: expected {t}, got boolean")
|
|
return errors
|
|
if t == "integer" and isinstance(value, float) and not value.is_integer():
|
|
errors.append(f"{path}: expected integer, got float {value}")
|
|
return errors
|
|
if not isinstance(value, expected):
|
|
errors.append(f"{path}: expected {t}, got {type(value).__name__}")
|
|
return errors
|
|
|
|
if "enum" in schema:
|
|
if value not in schema["enum"]:
|
|
errors.append(f"{path}: {value!r} not in enum {schema['enum']}")
|
|
|
|
if "minimum" in schema and isinstance(value, (int, float)):
|
|
if value < schema["minimum"]:
|
|
errors.append(f"{path}: {value} < minimum {schema['minimum']}")
|
|
|
|
if "pattern" in schema and isinstance(value, str):
|
|
if not re.match(schema["pattern"], value):
|
|
errors.append(
|
|
f"{path}: {value!r} does not match pattern {schema['pattern']!r}"
|
|
)
|
|
|
|
if t == "object" and isinstance(value, dict):
|
|
props: dict[str, Any] = schema.get("properties", {})
|
|
required: list[str] = schema.get("required", [])
|
|
additional: bool | dict[str, Any] = schema.get("additionalProperties", True)
|
|
prop_names: dict[str, Any] | None = schema.get("propertyNames")
|
|
|
|
for req in required:
|
|
if req not in value:
|
|
errors.append(f"{path}: missing required property {req!r}")
|
|
|
|
for k, v in value.items():
|
|
kpath = f"{path}.{k}"
|
|
if prop_names is not None:
|
|
errors.extend(_validate(k, prop_names, root, f"{kpath}<key>"))
|
|
if k in props:
|
|
errors.extend(_validate(v, props[k], root, kpath))
|
|
elif additional is False:
|
|
errors.append(f"{path}: unexpected property {k!r}")
|
|
elif isinstance(additional, dict):
|
|
errors.extend(_validate(v, additional, root, kpath))
|
|
|
|
if t == "array" and isinstance(value, list):
|
|
item_schema = schema.get("items")
|
|
if item_schema is not None:
|
|
for i, item in enumerate(value):
|
|
errors.extend(_validate(item, item_schema, root, f"{path}[{i}]"))
|
|
|
|
return errors
|
|
|
|
|
|
def validate(data: Any, schema: dict[str, Any] | None = None) -> list[str]:
|
|
"""Validate data against schema. Returns list of error strings (empty = valid)."""
|
|
s = schema if schema is not None else load_schema()
|
|
return _validate(data, s, s, "$")
|
|
|
|
|
|
def _validate_file(path: Path, schema: dict[str, Any], jsonl: bool) -> int:
|
|
"""Validate one file. Returns error count."""
|
|
total_errors = 0
|
|
try:
|
|
text = path.read_text()
|
|
except OSError as e:
|
|
print(f"{path}: cannot read ({e})", file=sys.stderr)
|
|
return 1
|
|
|
|
if jsonl:
|
|
for lineno, raw in enumerate(text.splitlines(), start=1):
|
|
raw = raw.strip()
|
|
if not raw:
|
|
continue
|
|
try:
|
|
data = json.loads(raw)
|
|
except json.JSONDecodeError as e:
|
|
print(f"{path}:{lineno}: invalid JSON ({e})", file=sys.stderr)
|
|
total_errors += 1
|
|
continue
|
|
errs = validate(data, schema)
|
|
if errs:
|
|
total_errors += len(errs)
|
|
print(f"{path}:{lineno}: {len(errs)} error(s)", file=sys.stderr)
|
|
for e in errs:
|
|
print(f" {e}", file=sys.stderr)
|
|
else:
|
|
try:
|
|
data = json.loads(text)
|
|
except json.JSONDecodeError as e:
|
|
print(f"{path}: invalid JSON ({e})", file=sys.stderr)
|
|
return 1
|
|
errs = validate(data, schema)
|
|
if errs:
|
|
total_errors += len(errs)
|
|
print(f"{path}: {len(errs)} error(s)", file=sys.stderr)
|
|
for e in errs:
|
|
print(f" {e}", file=sys.stderr)
|
|
else:
|
|
print(f"{path}: OK", file=sys.stderr)
|
|
|
|
if jsonl and total_errors == 0:
|
|
print(f"{path}: OK", file=sys.stderr)
|
|
|
|
return total_errors
|
|
|
|
|
|
def _main(argv: list[str]) -> int:
|
|
args = argv[1:]
|
|
|
|
schema_name = _DEFAULT_SCHEMA
|
|
jsonl = False
|
|
files: list[str] = []
|
|
|
|
i = 0
|
|
while i < len(args):
|
|
a = args[i]
|
|
if a == "--schema":
|
|
i += 1
|
|
if i >= len(args):
|
|
print("ERROR: --schema requires a value", file=sys.stderr)
|
|
return 2
|
|
schema_name = args[i]
|
|
elif a == "--jsonl":
|
|
jsonl = True
|
|
elif a.startswith("--schema="):
|
|
schema_name = a[len("--schema="):]
|
|
elif a.startswith("-"):
|
|
print(f"ERROR: unknown flag {a!r}", file=sys.stderr)
|
|
return 2
|
|
else:
|
|
files.append(a)
|
|
i += 1
|
|
|
|
if not files:
|
|
print(
|
|
"usage: autoplay-validate.py [--schema NAME] [--jsonl] <file> [<file> ...]",
|
|
file=sys.stderr,
|
|
)
|
|
print(
|
|
f" schemas: {', '.join(sorted(SCHEMA_PATHS))}",
|
|
file=sys.stderr,
|
|
)
|
|
return 2
|
|
|
|
try:
|
|
schema = load_schema(schema_name)
|
|
except ValueError as e:
|
|
print(f"ERROR: {e}", file=sys.stderr)
|
|
return 2
|
|
|
|
total_errors = 0
|
|
for f in files:
|
|
total_errors += _validate_file(Path(f), schema, jsonl)
|
|
|
|
return 1 if total_errors else 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(_main(sys.argv))
|