108 lines
3.4 KiB
Bash
Executable file
108 lines
3.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# p2-71 — 5-EndTurn smoke driver for the Claude Player API harness.
|
|
#
|
|
# Spawns claude-player-server.sh, sends 5 act:end_turn requests over stdin,
|
|
# parses JSON-Lines responses, and prints a one-line verdict:
|
|
#
|
|
# {"turns": 5, "ai_turn_completed_events": N,
|
|
# "actions_applied_per_turn": [...], "passed": true|false}
|
|
#
|
|
# Passing requires `actions_applied > 0` on every turn 1..5 for every AI slot.
|
|
# Exits 0 on pass, 1 on fail.
|
|
#
|
|
# Env: CP_SEED, CP_PLAYERS (default 3), CP_CLAUDE_SLOT (default 0), CP_MAP_SIZE.
|
|
|
|
set -uo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
|
|
|
: "${CP_PLAYERS:=3}"
|
|
: "${CP_CLAUDE_SLOT:=0}"
|
|
: "${CP_SEED:=42}"
|
|
: "${CP_MAP_SIZE:=duel}"
|
|
: "${SMOKE_TURNS:=5}"
|
|
|
|
export CP_PLAYERS CP_CLAUDE_SLOT CP_SEED CP_MAP_SIZE
|
|
|
|
TMP=$(mktemp -d -t mc-smoke-XXXXXX)
|
|
trap "rm -rf '$TMP'" EXIT
|
|
|
|
# Build the request stream — N end_turn acts followed by shutdown.
|
|
{
|
|
for i in $(seq 1 "$SMOKE_TURNS"); do
|
|
printf '{"type":"act","id":%d,"action":{"action":"end_turn"}}\n' "$i"
|
|
done
|
|
printf '{"type":"shutdown","id":999}\n'
|
|
} > "$TMP/in.jsonl"
|
|
|
|
# Run harness with timeout safety. 60s should be plenty for 5 turns on duel.
|
|
timeout 90 "$SCRIPT_DIR/claude-player-server.sh" < "$TMP/in.jsonl" > "$TMP/out.jsonl" 2>"$TMP/err.log" || true
|
|
|
|
# Parse — for each turn-response, count `ai_turn_completed` events and
|
|
# sum actions_applied across slots. Output one verdict line.
|
|
python3 - "$TMP/out.jsonl" "$SMOKE_TURNS" <<'PY'
|
|
import json, sys
|
|
out_path = sys.argv[1]
|
|
n_turns = int(sys.argv[2])
|
|
|
|
per_turn = []
|
|
total_ai_events = 0
|
|
with open(out_path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
obj = json.loads(line)
|
|
except Exception:
|
|
continue
|
|
if obj.get("ok") is not True:
|
|
continue
|
|
events = obj.get("events") or []
|
|
if not events:
|
|
continue
|
|
# Only process act-response envelopes (they have events).
|
|
slot_actions = {}
|
|
for ev in events:
|
|
ev_type = ev.get("event") or ev.get("type")
|
|
if ev_type == "ai_turn_completed":
|
|
total_ai_events += 1
|
|
slot = ev.get("player")
|
|
applied = ev.get("actions_applied", 0)
|
|
slot_actions[slot] = slot_actions.get(slot, 0) + int(applied)
|
|
if slot_actions:
|
|
per_turn.append(slot_actions)
|
|
|
|
# Verdict: every turn must have ai_turn_completed events with actions_applied>0
|
|
# on at least one slot (any non-Claude slot). Stronger requirement: every AI
|
|
# slot on every turn > 0.
|
|
passed = True
|
|
reasons = []
|
|
if len(per_turn) < n_turns:
|
|
passed = False
|
|
reasons.append(f"only {len(per_turn)} turn responses out of {n_turns}")
|
|
for i, slot_actions in enumerate(per_turn, 1):
|
|
if not slot_actions:
|
|
passed = False
|
|
reasons.append(f"turn {i}: no ai_turn_completed events")
|
|
for slot, applied in slot_actions.items():
|
|
if applied <= 0:
|
|
passed = False
|
|
reasons.append(f"turn {i} slot {slot}: actions_applied={applied}")
|
|
|
|
print(json.dumps({
|
|
"turns_observed": len(per_turn),
|
|
"ai_turn_completed_events": total_ai_events,
|
|
"actions_applied_per_turn": per_turn,
|
|
"passed": passed,
|
|
"reasons": reasons,
|
|
}))
|
|
sys.exit(0 if passed else 1)
|
|
PY
|
|
RC=$?
|
|
if [[ $RC -ne 0 ]]; then
|
|
echo "--- stderr ---" >&2
|
|
tail -40 "$TMP/err.log" >&2 || true
|
|
fi
|
|
exit $RC
|