feat(@projects/@magic-civilization): ✨ add batch status monitoring tools
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
b338e451c8
commit
e71cd3ca92
3 changed files with 213 additions and 0 deletions
74
tools/apricot-batch-status.sh
Executable file
74
tools/apricot-batch-status.sh
Executable file
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env bash
|
||||
# apricot-batch-status.sh — One-line check on all currently-running autoplay
|
||||
# batches on apricot, plus wall-clock summaries for any recently-completed ones.
|
||||
#
|
||||
# Usage:
|
||||
# tools/apricot-batch-status.sh # latest 3 batches + live procs
|
||||
# tools/apricot-batch-status.sh --latest # just the newest batch dir
|
||||
# tools/apricot-batch-status.sh <stamp> # a specific stamp
|
||||
#
|
||||
# Intended replacement for the ad-hoc `ssh apricot "for d in ...; do ..."`
|
||||
# one-liners the coordinator keeps re-deriving.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MODE="${1:-tail}"
|
||||
|
||||
case "$MODE" in
|
||||
--latest) DEPTH=1 ;;
|
||||
tail) DEPTH=3 ;;
|
||||
*) DEPTH=0; STAMP="$MODE" ;;
|
||||
esac
|
||||
|
||||
read -r -d '' QUERY <<'EOF' || true
|
||||
set -e
|
||||
: "${DEPTH:=3}"
|
||||
: "${STAMP:=}"
|
||||
|
||||
ROOT="$HOME/.cache/mc-batches"
|
||||
[ -d "$ROOT" ] || { echo "no $ROOT"; exit 0; }
|
||||
|
||||
echo "=== apricot $(date +'%Y-%m-%d %H:%M:%S') ==="
|
||||
echo "--- live processes ---"
|
||||
echo " godot: $(pgrep -f 'godot-bin' 2>/dev/null | wc -l)"
|
||||
echo " autoplay-batch: $(pgrep -f 'autoplay-batch' 2>/dev/null | wc -l)"
|
||||
echo " cargo: $(pgrep -f 'cargo build' 2>/dev/null | wc -l)"
|
||||
echo
|
||||
|
||||
if [ -n "$STAMP" ]; then
|
||||
targets="$ROOT/$STAMP"
|
||||
else
|
||||
targets=$(ls -1dt "$ROOT"/*/ 2>/dev/null | head -"$DEPTH")
|
||||
fi
|
||||
|
||||
for stamp_dir in $targets; do
|
||||
[ -d "$stamp_dir" ] || continue
|
||||
echo "--- $(basename "$stamp_dir") ---"
|
||||
# For each mode subdir (smoke | gpu-* | clan-*) or the stamp dir itself:
|
||||
for batch in "$stamp_dir" "$stamp_dir"/smoke "$stamp_dir"/gpu-* "$stamp_dir"/clan-*; do
|
||||
[ -d "$batch" ] || continue
|
||||
compgen -G "$batch/game_*" > /dev/null || continue
|
||||
n=$(ls -d "$batch"/game_* 2>/dev/null | wc -l)
|
||||
done_n=0 progress_n=0 victory_n=0
|
||||
for g in "$batch"/game_*; do
|
||||
s="$g/turn_stats.jsonl"
|
||||
[ -s "$s" ] || continue
|
||||
last=$(tail -1 "$s" 2>/dev/null)
|
||||
outcome=$(echo "$last" | grep -oE '"outcome":"[^"]+"' | head -1)
|
||||
if echo "$outcome" | grep -q 'victory'; then
|
||||
victory_n=$((victory_n + 1))
|
||||
done_n=$((done_n + 1))
|
||||
elif echo "$outcome" | grep -qE '(max_turns|in_progress)'; then
|
||||
progress_n=$((progress_n + 1))
|
||||
else
|
||||
done_n=$((done_n + 1))
|
||||
fi
|
||||
done
|
||||
label=$([ "$batch" = "$stamp_dir" ] && echo "(root)" || echo "$(basename "$batch")")
|
||||
echo " $label: $n games | victories=$victory_n, in_progress=$progress_n, other=$((n - victory_n - progress_n))"
|
||||
done
|
||||
echo
|
||||
done
|
||||
EOF
|
||||
|
||||
ssh apricot "DEPTH=${DEPTH} STAMP='${STAMP:-}' bash -s" <<< "$QUERY"
|
||||
56
tools/batch-outcomes.sh
Executable file
56
tools/batch-outcomes.sh
Executable file
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env bash
|
||||
# batch-outcomes.sh — Summarize per-seed outcomes in a batch directory.
|
||||
#
|
||||
# Usage:
|
||||
# tools/batch-outcomes.sh <batch_dir>
|
||||
# tools/batch-outcomes.sh apricot:<remote_batch_dir> # runs over ssh
|
||||
#
|
||||
# Output (one line per seed, tab-separated):
|
||||
# seedN outcome turn p0_cities p1_cities wall_clock_sec
|
||||
#
|
||||
# The batch_dir must contain one or more `game_*/turn_stats.jsonl` files.
|
||||
# Works for smoke/clan single-mode batches. For gpu-walltime and other
|
||||
# multi-mode batches, run once per mode subdir:
|
||||
# tools/batch-outcomes.sh apricot:~/.cache/mc-batches/20260418_080214/gpu-true
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
TARGET="${1:?usage: tools/batch-outcomes.sh <batch_dir | apricot:/path>}"
|
||||
|
||||
# The query is identical local vs remote — just the execution host differs.
|
||||
# Packing it as a here-doc keeps escaping sane.
|
||||
read -r -d '' QUERY <<'EOF' || true
|
||||
set -e
|
||||
: "${DIR:?DIR must be set}"
|
||||
for d in "$DIR"/game_*; do
|
||||
[ -d "$d" ] || continue
|
||||
seed=$(basename "$d" | grep -oE 'seed[0-9]+')
|
||||
stats="$d/turn_stats.jsonl"
|
||||
[ -s "$stats" ] || { printf '%s\tNO-STATS\t-\t-\t-\t-\n' "$seed"; continue; }
|
||||
last=$(tail -1 "$stats")
|
||||
python3 - "$last" "$seed" <<'PY'
|
||||
import json, sys
|
||||
line, seed = sys.argv[1], sys.argv[2]
|
||||
try:
|
||||
d = json.loads(line)
|
||||
except Exception as e:
|
||||
print(f"{seed}\tPARSE-ERR\t-\t-\t-\t-")
|
||||
sys.exit(0)
|
||||
outcome = d.get("outcome", "?")
|
||||
turn = d.get("turn", "?")
|
||||
wc = d.get("wall_clock_sec", "?")
|
||||
ps = d.get("player_stats", {})
|
||||
p0c = ps.get("0", {}).get("cities", "-")
|
||||
p1c = ps.get("1", {}).get("cities", "-")
|
||||
wcs = f"{wc:.1f}" if isinstance(wc, (int, float)) else str(wc)
|
||||
print(f"{seed}\t{outcome}\t{turn}\t{p0c}\t{p1c}\t{wcs}")
|
||||
PY
|
||||
done | sort -V
|
||||
EOF
|
||||
|
||||
if [[ "$TARGET" == apricot:* ]]; then
|
||||
REMOTE_PATH="${TARGET#apricot:}"
|
||||
ssh apricot "DIR='${REMOTE_PATH}' bash -s" <<< "$QUERY"
|
||||
else
|
||||
DIR="$TARGET" bash -c "$QUERY"
|
||||
fi
|
||||
83
tools/batch-walltime.sh
Executable file
83
tools/batch-walltime.sh
Executable file
|
|
@ -0,0 +1,83 @@
|
|||
#!/usr/bin/env bash
|
||||
# batch-walltime.sh — Aggregate per-batch wall-clock statistics.
|
||||
#
|
||||
# Usage:
|
||||
# tools/batch-walltime.sh <batch_dir>
|
||||
# tools/batch-walltime.sh apricot:<remote_batch_dir>
|
||||
# tools/batch-walltime.sh apricot:<parent_dir> # multi-mode → per-subdir
|
||||
#
|
||||
# Prints: "mode: n=NNN total=XX.Xs avg=YY.Ys/game victories=N/N"
|
||||
# If TARGET has a parent of {gpu-true,gpu-false,clan-*} subdirs, iterates
|
||||
# all of them so you can do a single call on the gpu-walltime parent.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
TARGET="${1:?usage: tools/batch-walltime.sh <batch_dir | apricot:/path>}"
|
||||
|
||||
read -r -d '' QUERY <<'EOF' || true
|
||||
set -e
|
||||
: "${DIR:?DIR must be set}"
|
||||
|
||||
summarize() {
|
||||
local dir="$1"
|
||||
local label="$2"
|
||||
python3 - "$dir" "$label" <<'PY'
|
||||
import json, os, sys, pathlib
|
||||
root = pathlib.Path(sys.argv[1])
|
||||
label = sys.argv[2]
|
||||
games = sorted(root.glob("game_*"))
|
||||
n = 0
|
||||
total_wc = 0.0
|
||||
victories = 0
|
||||
turns = []
|
||||
for g in games:
|
||||
stats = g / "turn_stats.jsonl"
|
||||
if not stats.is_file() or stats.stat().st_size == 0:
|
||||
continue
|
||||
try:
|
||||
last = None
|
||||
with open(stats) as f:
|
||||
for line in f:
|
||||
if line.strip(): last = line
|
||||
if not last: continue
|
||||
d = json.loads(last)
|
||||
except Exception:
|
||||
continue
|
||||
n += 1
|
||||
wc = d.get("wall_clock_sec")
|
||||
if isinstance(wc, (int, float)): total_wc += float(wc)
|
||||
if d.get("outcome") == "victory": victories += 1
|
||||
t = d.get("turn")
|
||||
if isinstance(t, int): turns.append(t)
|
||||
if n == 0:
|
||||
print(f"{label}: no games")
|
||||
sys.exit(0)
|
||||
avg = total_wc / n if n else 0
|
||||
tmin = min(turns) if turns else "?"
|
||||
tmax = max(turns) if turns else "?"
|
||||
print(f"{label}: n={n} total={total_wc:.1f}s avg={avg:.1f}s/game victories={victories}/{n} turns={tmin}-{tmax}")
|
||||
PY
|
||||
}
|
||||
|
||||
# Is DIR itself a batch dir (has game_* children) or a parent of mode-subdirs?
|
||||
if compgen -G "$DIR/game_*" > /dev/null; then
|
||||
summarize "$DIR" "$(basename "$DIR")"
|
||||
else
|
||||
# Iterate mode-subdirs
|
||||
found=0
|
||||
for sub in "$DIR"/gpu-* "$DIR"/clan-* "$DIR"/smoke; do
|
||||
[ -d "$sub" ] || continue
|
||||
compgen -G "$sub/game_*" > /dev/null || continue
|
||||
summarize "$sub" "$(basename "$sub")"
|
||||
found=1
|
||||
done
|
||||
[ "$found" -eq 0 ] && { echo "no batches found under $DIR" >&2; exit 2; }
|
||||
fi
|
||||
EOF
|
||||
|
||||
if [[ "$TARGET" == apricot:* ]]; then
|
||||
REMOTE_PATH="${TARGET#apricot:}"
|
||||
ssh apricot "DIR='${REMOTE_PATH}' bash -s" <<< "$QUERY"
|
||||
else
|
||||
DIR="$TARGET" bash -c "$QUERY"
|
||||
fi
|
||||
Loading…
Add table
Reference in a new issue