feat(@projects/@magic-civilization): add batch status monitoring tools

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-04-18 09:17:11 -07:00
parent b338e451c8
commit e71cd3ca92
3 changed files with 213 additions and 0 deletions

74
tools/apricot-batch-status.sh Executable file
View file

@ -0,0 +1,74 @@
#!/usr/bin/env bash
# apricot-batch-status.sh — One-line check on all currently-running autoplay
# batches on apricot, plus wall-clock summaries for any recently-completed ones.
#
# Usage:
# tools/apricot-batch-status.sh # latest 3 batches + live procs
# tools/apricot-batch-status.sh --latest # just the newest batch dir
# tools/apricot-batch-status.sh <stamp> # a specific stamp
#
# Intended replacement for the ad-hoc `ssh apricot "for d in ...; do ..."`
# one-liners the coordinator keeps re-deriving.
set -euo pipefail
MODE="${1:-tail}"
case "$MODE" in
--latest) DEPTH=1 ;;
tail) DEPTH=3 ;;
*) DEPTH=0; STAMP="$MODE" ;;
esac
read -r -d '' QUERY <<'EOF' || true
set -e
: "${DEPTH:=3}"
: "${STAMP:=}"
ROOT="$HOME/.cache/mc-batches"
[ -d "$ROOT" ] || { echo "no $ROOT"; exit 0; }
echo "=== apricot $(date +'%Y-%m-%d %H:%M:%S') ==="
echo "--- live processes ---"
echo " godot: $(pgrep -f 'godot-bin' 2>/dev/null | wc -l)"
echo " autoplay-batch: $(pgrep -f 'autoplay-batch' 2>/dev/null | wc -l)"
echo " cargo: $(pgrep -f 'cargo build' 2>/dev/null | wc -l)"
echo
if [ -n "$STAMP" ]; then
targets="$ROOT/$STAMP"
else
targets=$(ls -1dt "$ROOT"/*/ 2>/dev/null | head -"$DEPTH")
fi
for stamp_dir in $targets; do
[ -d "$stamp_dir" ] || continue
echo "--- $(basename "$stamp_dir") ---"
# For each mode subdir (smoke | gpu-* | clan-*) or the stamp dir itself:
for batch in "$stamp_dir" "$stamp_dir"/smoke "$stamp_dir"/gpu-* "$stamp_dir"/clan-*; do
[ -d "$batch" ] || continue
compgen -G "$batch/game_*" > /dev/null || continue
n=$(ls -d "$batch"/game_* 2>/dev/null | wc -l)
done_n=0 progress_n=0 victory_n=0
for g in "$batch"/game_*; do
s="$g/turn_stats.jsonl"
[ -s "$s" ] || continue
last=$(tail -1 "$s" 2>/dev/null)
outcome=$(echo "$last" | grep -oE '"outcome":"[^"]+"' | head -1)
if echo "$outcome" | grep -q 'victory'; then
victory_n=$((victory_n + 1))
done_n=$((done_n + 1))
elif echo "$outcome" | grep -qE '(max_turns|in_progress)'; then
progress_n=$((progress_n + 1))
else
done_n=$((done_n + 1))
fi
done
label=$([ "$batch" = "$stamp_dir" ] && echo "(root)" || echo "$(basename "$batch")")
echo " $label: $n games | victories=$victory_n, in_progress=$progress_n, other=$((n - victory_n - progress_n))"
done
echo
done
EOF
ssh apricot "DEPTH=${DEPTH} STAMP='${STAMP:-}' bash -s" <<< "$QUERY"

56
tools/batch-outcomes.sh Executable file
View file

@ -0,0 +1,56 @@
#!/usr/bin/env bash
# batch-outcomes.sh — Summarize per-seed outcomes in a batch directory.
#
# Usage:
# tools/batch-outcomes.sh <batch_dir>
# tools/batch-outcomes.sh apricot:<remote_batch_dir> # runs over ssh
#
# Output (one line per seed, tab-separated):
# seedN outcome turn p0_cities p1_cities wall_clock_sec
#
# The batch_dir must contain one or more `game_*/turn_stats.jsonl` files.
# Works for smoke/clan single-mode batches. For gpu-walltime and other
# multi-mode batches, run once per mode subdir:
# tools/batch-outcomes.sh apricot:~/.cache/mc-batches/20260418_080214/gpu-true
set -euo pipefail
TARGET="${1:?usage: tools/batch-outcomes.sh <batch_dir | apricot:/path>}"
# The query is identical local vs remote — just the execution host differs.
# Packing it as a here-doc keeps escaping sane.
read -r -d '' QUERY <<'EOF' || true
set -e
: "${DIR:?DIR must be set}"
for d in "$DIR"/game_*; do
[ -d "$d" ] || continue
seed=$(basename "$d" | grep -oE 'seed[0-9]+')
stats="$d/turn_stats.jsonl"
[ -s "$stats" ] || { printf '%s\tNO-STATS\t-\t-\t-\t-\n' "$seed"; continue; }
last=$(tail -1 "$stats")
python3 - "$last" "$seed" <<'PY'
import json, sys
line, seed = sys.argv[1], sys.argv[2]
try:
d = json.loads(line)
except Exception as e:
print(f"{seed}\tPARSE-ERR\t-\t-\t-\t-")
sys.exit(0)
outcome = d.get("outcome", "?")
turn = d.get("turn", "?")
wc = d.get("wall_clock_sec", "?")
ps = d.get("player_stats", {})
p0c = ps.get("0", {}).get("cities", "-")
p1c = ps.get("1", {}).get("cities", "-")
wcs = f"{wc:.1f}" if isinstance(wc, (int, float)) else str(wc)
print(f"{seed}\t{outcome}\t{turn}\t{p0c}\t{p1c}\t{wcs}")
PY
done | sort -V
EOF
if [[ "$TARGET" == apricot:* ]]; then
REMOTE_PATH="${TARGET#apricot:}"
ssh apricot "DIR='${REMOTE_PATH}' bash -s" <<< "$QUERY"
else
DIR="$TARGET" bash -c "$QUERY"
fi

83
tools/batch-walltime.sh Executable file
View file

@ -0,0 +1,83 @@
#!/usr/bin/env bash
# batch-walltime.sh — Aggregate per-batch wall-clock statistics.
#
# Usage:
# tools/batch-walltime.sh <batch_dir>
# tools/batch-walltime.sh apricot:<remote_batch_dir>
# tools/batch-walltime.sh apricot:<parent_dir> # multi-mode → per-subdir
#
# Prints: "mode: n=NNN total=XX.Xs avg=YY.Ys/game victories=N/N"
# If TARGET has a parent of {gpu-true,gpu-false,clan-*} subdirs, iterates
# all of them so you can do a single call on the gpu-walltime parent.
set -euo pipefail
TARGET="${1:?usage: tools/batch-walltime.sh <batch_dir | apricot:/path>}"
read -r -d '' QUERY <<'EOF' || true
set -e
: "${DIR:?DIR must be set}"
summarize() {
local dir="$1"
local label="$2"
python3 - "$dir" "$label" <<'PY'
import json, os, sys, pathlib
root = pathlib.Path(sys.argv[1])
label = sys.argv[2]
games = sorted(root.glob("game_*"))
n = 0
total_wc = 0.0
victories = 0
turns = []
for g in games:
stats = g / "turn_stats.jsonl"
if not stats.is_file() or stats.stat().st_size == 0:
continue
try:
last = None
with open(stats) as f:
for line in f:
if line.strip(): last = line
if not last: continue
d = json.loads(last)
except Exception:
continue
n += 1
wc = d.get("wall_clock_sec")
if isinstance(wc, (int, float)): total_wc += float(wc)
if d.get("outcome") == "victory": victories += 1
t = d.get("turn")
if isinstance(t, int): turns.append(t)
if n == 0:
print(f"{label}: no games")
sys.exit(0)
avg = total_wc / n if n else 0
tmin = min(turns) if turns else "?"
tmax = max(turns) if turns else "?"
print(f"{label}: n={n} total={total_wc:.1f}s avg={avg:.1f}s/game victories={victories}/{n} turns={tmin}-{tmax}")
PY
}
# Is DIR itself a batch dir (has game_* children) or a parent of mode-subdirs?
if compgen -G "$DIR/game_*" > /dev/null; then
summarize "$DIR" "$(basename "$DIR")"
else
# Iterate mode-subdirs
found=0
for sub in "$DIR"/gpu-* "$DIR"/clan-* "$DIR"/smoke; do
[ -d "$sub" ] || continue
compgen -G "$sub/game_*" > /dev/null || continue
summarize "$sub" "$(basename "$sub")"
found=1
done
[ "$found" -eq 0 ] && { echo "no batches found under $DIR" >&2; exit 2; }
fi
EOF
if [[ "$TARGET" == apricot:* ]]; then
REMOTE_PATH="${TARGET#apricot:}"
ssh apricot "DIR='${REMOTE_PATH}' bash -s" <<< "$QUERY"
else
DIR="$TARGET" bash -c "$QUERY"
fi