fix(scripts): 🐛 Fix false positives in container liveness checks and batch status reporting logic
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
619dafbdcc
commit
2d7357550e
1 changed files with 28 additions and 21 deletions
|
|
@ -334,25 +334,32 @@ if [[ "${MODE}" == "status" ]]; then
|
|||
|
||||
# Single ssh probe with short ConnectTimeout. Four lightweight queries:
|
||||
# 1. systemctl --user is-active <unit> (active|inactive|failed|unknown)
|
||||
# `|| true` because is-active exits non-zero on inactive/failed/unknown
|
||||
# units and we want the stdout string, not the exit code. The previous
|
||||
# `|| echo unknown` appended a second line that broke the single-line
|
||||
# `|`-printf contract → `read -r` left every other field empty →
|
||||
# false-positive `failed` after a clean batch.
|
||||
# 2. count of completion.marker files under <stamp>/*/
|
||||
# 3. count of turn_stats.jsonl files under <stamp>/*/game_*/
|
||||
# 4. count of live godot processes for THIS batch stamp
|
||||
# 4. container liveness for THIS batch stamp.
|
||||
# Pre-docker-wrapper era counted live godot procs via pgrep, but
|
||||
# (a) the launch stamp is never in the godot cmdline (the
|
||||
# autoplay-batch.sh runtime stamp is, and the two are distinct), and
|
||||
# (b) `pgrep -af "godot.*${STAMP}" | grep "godot --path"` self-matched
|
||||
# the ssh probe itself (which contains both substrings as literals).
|
||||
# Under the docker wrapper the launcher.sh blocks on `docker run --rm`
|
||||
# and the trap docker-kills the container on exit, so container
|
||||
# existence is the correct liveness signal.
|
||||
# We also read seeds_total from the first submode dir if present.
|
||||
#
|
||||
# The godot-proc count is load-bearing: `flatpak run` detaches into a
|
||||
# systemd user scope, so autoplay-batch.sh's `wait` returns and
|
||||
# completion.marker is touched while the actual godot processes are still
|
||||
# running headless games. Without checking live procs, fetch would pull
|
||||
# mid-run turn_stats with outcome=in_progress and the consumer would
|
||||
# think the gate failed when in fact games hadn't finished yet.
|
||||
PROBE='set +e
|
||||
IS_ACTIVE=$(systemctl --user is-active '"${UNIT}"' 2>/dev/null || echo unknown)
|
||||
IS_ACTIVE=$(systemctl --user is-active '"${UNIT}"' 2>/dev/null || true)
|
||||
IS_ACTIVE=${IS_ACTIVE:-unknown}
|
||||
MARKER_COUNT=$(ls "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/completion.marker 2>/dev/null | wc -l | tr -d " ")
|
||||
STATS_COUNT=$(ls "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/game_*/turn_stats.jsonl 2>/dev/null | wc -l | tr -d " ")
|
||||
GODOT_PROCS=$(pgrep -af "godot.*'"${QUERY_STAMP}"'" 2>/dev/null | grep -c "godot --path" || echo 0)
|
||||
CONTAINER_LIVE=$(docker ps --filter "name=mc-batch-'"${QUERY_STAMP}"'-run" --quiet 2>/dev/null | wc -l | tr -d " ")
|
||||
SEEDS_TOTAL=$(cat "$HOME/.cache/mc-batches/'"${QUERY_STAMP}"'"/*/seeds_total 2>/dev/null | head -1)
|
||||
SEEDS_TOTAL=${SEEDS_TOTAL:-0}
|
||||
printf "%s|%s|%s|%s|%s\n" "$IS_ACTIVE" "$MARKER_COUNT" "$STATS_COUNT" "$SEEDS_TOTAL" "$GODOT_PROCS"'
|
||||
printf "%s|%s|%s|%s|%s\n" "$IS_ACTIVE" "$MARKER_COUNT" "$STATS_COUNT" "$SEEDS_TOTAL" "$CONTAINER_LIVE"'
|
||||
|
||||
PROBE_OUT="$(ssh -o ConnectTimeout=5 -o BatchMode=yes "${APRICOT}" "${PROBE}" 2>/dev/null)" || PROBE_OUT=""
|
||||
|
||||
|
|
@ -361,21 +368,21 @@ if [[ "${MODE}" == "status" ]]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
IFS='|' read -r IS_ACTIVE MARKER_COUNT STATS_COUNT SEEDS_TOTAL GODOT_PROCS <<<"${PROBE_OUT}"
|
||||
IFS='|' read -r IS_ACTIVE MARKER_COUNT STATS_COUNT SEEDS_TOTAL CONTAINER_LIVE <<<"${PROBE_OUT}"
|
||||
MARKER_COUNT="${MARKER_COUNT:-0}"
|
||||
STATS_COUNT="${STATS_COUNT:-0}"
|
||||
SEEDS_TOTAL="${SEEDS_TOTAL:-0}"
|
||||
GODOT_PROCS="${GODOT_PROCS:-0}"
|
||||
CONTAINER_LIVE="${CONTAINER_LIVE:-0}"
|
||||
|
||||
if [[ "${MARKER_COUNT}" -gt 0 && "${GODOT_PROCS}" -eq 0 ]]; then
|
||||
if [[ "${MARKER_COUNT}" -gt 0 && "${CONTAINER_LIVE}" -eq 0 ]]; then
|
||||
STATE="complete"
|
||||
MARKER_BOOL="true"
|
||||
elif [[ "${MARKER_COUNT}" -gt 0 && "${GODOT_PROCS}" -gt 0 ]]; then
|
||||
# Batch script returned (touched completion.marker) but flatpak-
|
||||
# detached godot processes are still playing games. Status remains
|
||||
# `running` so `fetch` won't pull mid-run turn_stats snapshots.
|
||||
elif [[ "${CONTAINER_LIVE}" -gt 0 ]]; then
|
||||
# Container still running — batch is in-flight even if the launcher
|
||||
# has somehow touched a stale marker (it shouldn't, the touch is
|
||||
# post-batch, but be defensive).
|
||||
STATE="running"
|
||||
MARKER_BOOL="true"
|
||||
MARKER_BOOL=$([[ "${MARKER_COUNT}" -gt 0 ]] && echo "true" || echo "false")
|
||||
elif [[ "${IS_ACTIVE}" == "active" || "${IS_ACTIVE}" == "activating" ]]; then
|
||||
STATE="running"
|
||||
MARKER_BOOL="false"
|
||||
|
|
@ -384,8 +391,8 @@ if [[ "${MODE}" == "status" ]]; then
|
|||
MARKER_BOOL="false"
|
||||
fi
|
||||
|
||||
printf '{"stamp":"%s","state":"%s","seeds_done":%s,"seeds_total":%s,"completion_marker":%s,"godot_procs":%s}\n' \
|
||||
"${QUERY_STAMP}" "${STATE}" "${STATS_COUNT}" "${SEEDS_TOTAL}" "${MARKER_BOOL}" "${GODOT_PROCS}"
|
||||
printf '{"stamp":"%s","state":"%s","seeds_done":%s,"seeds_total":%s,"completion_marker":%s,"container_live":%s}\n' \
|
||||
"${QUERY_STAMP}" "${STATE}" "${STATS_COUNT}" "${SEEDS_TOTAL}" "${MARKER_BOOL}" "${CONTAINER_LIVE}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue