magicciv/scripts/claude-smoke-5endturn.sh
Natalie ad108810dd feat(@projects/@magic-civilization): add rl-self-play harness and Claude player integration
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-05-17 03:51:07 -07:00

108 lines
3.4 KiB
Bash
Executable file

#!/usr/bin/env bash
# p2-71 — 5-EndTurn smoke driver for the Claude Player API harness.
#
# Spawns player-api-server.sh, sends 5 act:end_turn requests over stdin,
# parses JSON-Lines responses, and prints a one-line verdict:
#
# {"turns": 5, "ai_turn_completed_events": N,
# "actions_applied_per_turn": [...], "passed": true|false}
#
# Passing requires `actions_applied > 0` on every turn 1..5 for every AI slot.
# Exits 0 on pass, 1 on fail.
#
# Env: CP_SEED, CP_PLAYERS (default 3), CP_PLAYER_SLOT (default 0), CP_MAP_SIZE.
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
: "${CP_PLAYERS:=3}"
: "${CP_PLAYER_SLOT:=0}"
: "${CP_SEED:=42}"
: "${CP_MAP_SIZE:=duel}"
: "${SMOKE_TURNS:=5}"
export CP_PLAYERS CP_PLAYER_SLOT CP_SEED CP_MAP_SIZE
TMP=$(mktemp -d -t mc-smoke-XXXXXX)
trap "rm -rf '$TMP'" EXIT
# Build the request stream — N end_turn acts followed by shutdown.
{
for i in $(seq 1 "$SMOKE_TURNS"); do
printf '{"type":"act","id":%d,"action":{"type":"end_turn"}}\n' "$i"
done
printf '{"type":"shutdown","id":999}\n'
} > "$TMP/in.jsonl"
# Run harness with timeout safety. 60s should be plenty for 5 turns on duel.
timeout 90 "$SCRIPT_DIR/player-api-server.sh" < "$TMP/in.jsonl" > "$TMP/out.jsonl" 2>"$TMP/err.log" || true
# Parse — for each turn-response, count `ai_turn_completed` events and
# sum actions_applied across slots. Output one verdict line.
python3 - "$TMP/out.jsonl" "$SMOKE_TURNS" <<'PY'
import json, sys
out_path = sys.argv[1]
n_turns = int(sys.argv[2])
per_turn = []
total_ai_events = 0
with open(out_path) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except Exception:
continue
if obj.get("ok") is not True:
continue
events = obj.get("events") or []
if not events:
continue
# Only process act-response envelopes (they have events).
slot_actions = {}
for ev in events:
ev_type = ev.get("event") or ev.get("type")
if ev_type == "ai_turn_completed":
total_ai_events += 1
slot = ev.get("player")
applied = ev.get("actions_applied", 0)
slot_actions[slot] = slot_actions.get(slot, 0) + int(applied)
if slot_actions:
per_turn.append(slot_actions)
# Verdict: every turn must have ai_turn_completed events with actions_applied>0
# on at least one slot (any non-Claude slot). Stronger requirement: every AI
# slot on every turn > 0.
passed = True
reasons = []
if len(per_turn) < n_turns:
passed = False
reasons.append(f"only {len(per_turn)} turn responses out of {n_turns}")
for i, slot_actions in enumerate(per_turn, 1):
if not slot_actions:
passed = False
reasons.append(f"turn {i}: no ai_turn_completed events")
for slot, applied in slot_actions.items():
if applied <= 0:
passed = False
reasons.append(f"turn {i} slot {slot}: actions_applied={applied}")
print(json.dumps({
"turns_observed": len(per_turn),
"ai_turn_completed_events": total_ai_events,
"actions_applied_per_turn": per_turn,
"passed": passed,
"reasons": reasons,
}))
sys.exit(0 if passed else 1)
PY
RC=$?
if [[ $RC -ne 0 ]]; then
echo "--- stderr ---" >&2
tail -40 "$TMP/err.log" >&2 || true
fi
exit $RC