ryanbreen · ryanbreen · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/.claude/workflows/parallels-launcher-test.js b/.claude/workflows/parallels-launcher-test.js
@@ -0,0 +1,102 @@
+export const meta = {
+  name: 'parallels-launcher-test',
+  description: 'Drive the Breenix launcher->terminal smoke test on a fresh Parallels VM, sequentially (one VM, never parallel), measuring the consecutive-green streak until 10-in-a-row or 15 attempts.',
+  phases: [
+    { title: 'Gate', detail: 'Run launcher-smoke.sh --no-build up to 15 times sequentially; stop early at a 10-consecutive-PASS streak.' },
+  ],
+};
+
+const MAX_ATTEMPTS = 15;
+const TARGET_STREAK = 10;
+
+const ATTEMPT_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    pass: { type: 'boolean', description: 'true ONLY if the script printed exactly "RESULT: PASS"' },
+    reason: { type: 'string', description: 'For a FAIL, the text after "RESULT: FAIL:"; for a PASS, "ok".' },
+    injectionMs: { type: 'integer', description: 'The double-tap injection wall-time in ms from the smoke log line "double-tap injection wall-time: <N>ms", or -1 if not found.' },
+    launcherOpened: { type: 'boolean', description: 'true if the serial/evidence shows the launcher opened this run.' },
+    evidencePath: { type: 'string', description: 'Absolute path to the run-* evidence dir (from result.txt evidence_dir=), or empty string.' },
+  },
+  required: ['pass', 'reason', 'evidencePath'],
+};
+
+const ATTEMPT_PROMPT = [
+  'Run the Breenix launcher->terminal smoke test ONCE and report the structured outcome.',
+  '',
+  'HOW TO RUN (mandatory):',
+  '- Use the Bash tool with dangerouslyDisableSandbox:true AND run_in_background:true.',
+  '- Command (note --no-build: artifacts already exist; a per-run rebuild is wrong and wasteful):',
+  '    bash /Users/wrb/fun/code/breenix/scripts/parallels/launcher-smoke.sh --no-build',
+  '- A single run takes ~6-10 min (fresh VM boot + ~60s VirGL warmup + injection + validation).',
+  '- Because it is backgrounded, poll its output every ~30s until it prints a line beginning with "RESULT:".',
+  '  Do NOT give up early; wait for the RESULT line or for the process to exit (allow up to ~22 min).',
+  '',
+  'BEFORE running, confirm the macOS screen is UNLOCKED:',
+  '  python3 -c "import Quartz;d=Quartz.CGSessionCopyCurrentDictionary();print(\'LOCKED\' if (d and d.get(\'CGSSessionScreenIsLocked\')) else \'UNLOCKED\')"',
+  '  If it prints LOCKED, do NOT run; return pass=false, reason="aborted: macOS screen is locked (Parallels drops injected keys)".',
+  '',
+  'PARSING THE OUTCOME (be strictly honest):',
+  '- pass = true ONLY if the final line is exactly "RESULT: PASS".',
+  '- If "RESULT: FAIL: <reason>", pass=false and reason = the text after "RESULT: FAIL:".',
+  '- If no RESULT line is ever printed, pass=false and reason="no RESULT line emitted".',
+  '- injectionMs = the integer from the smoke log line "double-tap injection wall-time: <N>ms" (look in the backgrounded output / the run dir); -1 if not found. (>350ms means the double-tap likely missed its 400ms window.)',
+  '- launcherOpened = true if the run evidence/serial shows the launcher opened (e.g. grep the run dir / serial for "[spawn] path=\'/bin/blauncher\'").',
+  '- evidencePath = the "evidence_dir=" value from the run\'s result.txt (under /Users/wrb/fun/code/breenix/logs/parallels-launcher-test/run-<ts>/); empty string if unknown.',
+  '',
+  'Never report pass=true on "launcher opened" or "process created" alone — only on the exact "RESULT: PASS" line.',
+  'Do NOT run multiple VMs in parallel; this single run owns the one Parallels VM. Do NOT modify any files.',
+].join('\n');
+
+phase('Gate');
+
+let consecutive = 0;
+let greenStreakMax = 0;
+let attempts = 0;
+let firstFailure = null;
+let lastEvidenceDir = '';
+const perAttempt = [];
+
+for (let i = 1; i <= MAX_ATTEMPTS; i++) {
+  attempts = i;
+  log('Attempt ' + i + '/' + MAX_ATTEMPTS + ' — consecutive-green streak: ' + consecutive + '/' + TARGET_STREAK);
+
+  const result = await agent(ATTEMPT_PROMPT, { schema: ATTEMPT_SCHEMA, label: 'attempt-' + i, phase: 'Gate' });
+
+  const r = result || { pass: false, reason: 'agent returned null', injectionMs: -1, launcherOpened: false, evidencePath: '' };
+  perAttempt.push({ attempt: i, pass: r.pass, reason: r.reason, injectionMs: r.injectionMs, launcherOpened: r.launcherOpened });
+  if (r.evidencePath) {
+    lastEvidenceDir = r.evidencePath;
+  }
+
+  if (r.pass) {
+    consecutive = consecutive + 1;
+    if (consecutive > greenStreakMax) {
+      greenStreakMax = consecutive;
+    }
+    log('Attempt ' + i + ' PASS — streak now ' + consecutive + '/' + TARGET_STREAK + ' (inject ' + r.injectionMs + 'ms)');
+    if (consecutive >= TARGET_STREAK) {
+      log('Reached ' + TARGET_STREAK + ' consecutive green; stopping after ' + i + ' attempts.');
+      break;
+    }
+  } else {
+    if (firstFailure === null) {
+      firstFailure = { attempt: i, reason: r.reason, injectionMs: r.injectionMs, launcherOpened: r.launcherOpened, evidencePath: r.evidencePath };
+    }
+    log('Attempt ' + i + ' FAIL (' + r.reason + ') — streak reset ' + consecutive + ' -> 0; continuing to measure flakiness.');
+    consecutive = 0;
+  }
+}
+
+const consecutiveGreenAchieved = greenStreakMax >= TARGET_STREAK;
+log('Done. attempts=' + attempts + ' greenStreakMax=' + greenStreakMax + ' consecutiveGreenAchieved=' + consecutiveGreenAchieved);
+
+return {
+  consecutiveGreenAchieved: consecutiveGreenAchieved,
+  greenStreakMax: greenStreakMax,
+  attempts: attempts,
+  firstFailure: firstFailure,
+  perAttempt: perAttempt,
+  evidenceDir: lastEvidenceDir,
+};
diff --git a/docs/planning/aarch64-launcher-spawn-crash/ROOT_CAUSE.md b/docs/planning/aarch64-launcher-spawn-crash/ROOT_CAUSE.md
@@ -0,0 +1,102 @@
+# AArch64 launcher-spawn intermittent crash — root cause + fix proposal
+
+**Status (2026-06-02):** Root cause CONFIRMED (high confidence on the proximate
+mechanism; medium on the exact upstream writer). **Fix is gold-master and awaits
+operator signoff** — see "Fix options" + the autopsy caveat. Found by the
+automated Parallels launcher-test harness (PR #411).
+
+## Symptom
+Intermittently, on the launcher→terminal path, a CPU takes an unhandled sync
+exception at a **page-aligned kernel data address**:
+- `[UNHANDLED_EC] cpu=N EC=0x0 ELR=0xffff000040269000` (ESR=0x2000000, "Unknown"), or
+- (earlier) `EC=0xe ELR=0xffff00004025d000` (Illegal Execution State).
+
+The default handler parks/redirects the CPU, so heartbeats continue (looks
+"hung"). Rate in an 18-run sweep: **2 EC=0x0 crashes / 18** (~11%); also 4/18
+double-tap input drops (a separate bug). EC=0x0 happened to be survivable
+(launcher still PASSed); EC=0xe was fatal to the run.
+
+## Proximate cause — CONFIRMED
+The captured `[FATAL_REGS]` register file **is verbatim `idle_loop_arm64`'s
+mid-loop state**, decisively symbolized against `kernel-aarch64` (base
+`0xffff000040000000`):
+
+| reg | value | symbol |
+|---|---|---|
+| elr (fault PC) | `0x269000` | `scheduler::WAKE_SITE_SCHEDULE` (= `__bss_start`), held in idle's `x21` |
+| x30, x22 | `0x269070` | `scheduler::NEED_RESCHED`, idle's `x22` |
+| x1 | `0x269080` | `scheduler::CPU_IS_IDLE` |
+| x26 | `0x0d7498` | `idle_loop_arm64+0x60` (idle loop body) |
+| ctx_elr_el1 / peers' DEFER_SNAP elr | `0x0d5368` | `schedule_from_kernel+0xfc0` (normal "parked in scheduler" PC) |
+
+`idle_loop_arm64`'s prologue loads `x21=WAKE_SITE_SCHEDULE(0x269000)` and
+`x22=NEED_RESCHED(0x269070)`. The fault frame's `elr == idle.x21` and
+`x30==x22==idle.x22` — i.e. **a non-idle thread's `Thread.context` was overwritten
+with idle's register file** (including `elr_el1 = 0x269000`). When that thread is
+later dispatched, `restore_*_context_inline` copies `frame.elr =
+thread.context.elr_el1 = 0x269000` and `aarch64_enter_exception_frame` ERETs there.
+`0x269000` is `.bss` (zeroed) → `0x00000000` decodes to `UDF #0` → **EC=0x0**.
+If instead the corrupt SPSR is illegal, the ERET itself faults → **EC=0xe**. Same bug.
+
+**Why the existing dispatch guard misses it:** `dispatch_thread_locked` checks
+only `frame.elr < 0x1000 || (frame.spsr & 0xF) != 0`. `0x269000 ≥ 0x1000` and (for
+an EL0t dispatch) `spsr & 0xF == 0`, so the corrupt context passes.
+
+## Upstream cause — candidates (medium confidence)
+Both reduce to *idle's register file ending up in a non-idle thread's `context`*:
+1. **cpu_state / `old_id` save-target skew.** If `cpu_state[cpu].current_thread`
+   names a userspace thread while the CPU was actually running `idle_loop_arm64`
+   (e.g. after a ret-based idle dispatch that `br`s to idle without rebuilding
+   cpu_state, then a timer IRQ), `save_*_context_inline(userspace_thread,
+   idle_frame)` writes idle's regs into that thread's context. `fix_eret_cpu_state_locked`
+   is the existing band-aid but only fires for EL0 frames.
+2. **Reused fork kernel stack carrying a stale frame** (commit `04c9655a`,
+   bitmap-backed kstack reuse; the fault SP is in that region) — a child whose
+   reused kstack still holds a prior idle/scheduler exception frame.
+
+Implicated machinery is exactly what the branch's cluster reshaped: `04c9655a`
+(fork kstack reuse), `969ecce2` (CLONE_VM exec), `90a971ce` (stale cached TTBR0
+requeue). Likely a **residual cpu_state/stack-ownership skew** from that cluster,
+not a fresh regression — and almost certainly the same root behind the operator's
+original launcher→terminal lockup and the prior ~week-long crash hunt
+(`ELR=0x8`/`0x1e`/`0x3b9aca00`/`EC=0x18` were the same corridor).
+
+## Fix options (BOTH are gold-master → operator signoff required)
+1. **Root fix (preferred): stop the bad save.** Correct the save-target selection
+   in `check_need_resched_and_switch_arm64` / `save_*_context_inline` so idle's
+   register file is never saved into a non-idle thread's context (fix the
+   cpu_state/`old_id` skew, or the reused-stack stale frame). Requires pinning
+   which of the two writers — see "Confirm the writer" below.
+2. **Defense-in-depth: privilege-aware dispatch guard.** Reject any dispatch where
+   `frame.elr` is inconsistent with the target EL (EL0 dispatch → elr must be a
+   userspace VA, not a kernel VA; EL1 dispatch → elr must be in `.text`), and
+   safely terminate/requeue the victim instead of ERETing into data.
+   **⚠ AUTOPSY CAVEAT:** `context_switch.rs` is gold-master and the autopsy
+   (`docs/planning/cpu0-user-guard-autopsy/README.md`) explicitly warns **"NO
+   CPU0-specific EL0 dispatch guard"** — a dispatch guard here caused a week-long
+   regression (PR #334). This option intersects that frozen concern and must be
+   designed + reviewed with the autopsy in hand. It mitigates + diagnoses but does
+   not fix the upstream save-skew.
+
+## Confirm the writer (needed before the root fix)
+This crash is **Parallels-only** (BWM/VirGL), so the QEMU GDB workflow cannot reach
+it. Confirmation must be in-kernel + Parallels repro:
+- Add a **lock-free trace event** (or a small per-CPU ring) at the save site
+  recording `(old_id, executing-is-idle, cpu_state.current_thread, cpu)` — to
+  prove the save-target skew directly. **This touches the gold-master save path →
+  signoff.** Then reproduce via the launcher harness and read the capture.
+- The enhanced postmortem (`[FATAL_REGS]`/`[FATAL_THREAD]`, committed `b1961217`,
+  exception.rs — not gold-master) already proves the proximate cause; extend it
+  with `cpu_state` at fault if a cheaper signal is wanted.
+
+## How to validate a fix
+Run the launcher harness gate (`scripts/parallels/launcher-smoke.sh` /
+`.claude/workflows/parallels-launcher-test.js`) — the EC=0x0/EC=0xe crashes must
+disappear across a multi-run sweep. The harness already reports kernel faults
+distinctly (`RESULT: FAIL: KERNEL FAULT ...`).
+
+## Evidence
+- `logs/parallels-launcher-test/run-20260602-202819/run-sh.log` (EC=0x0 + full
+  `[FATAL_REGS]`/`[FATAL_THREAD]`/trace ring), `run-20260602-204127` (2nd capture),
+  and the earlier EC=0xe `run-20260602-124137`.
+- Enhanced postmortem: commit `b1961217` (exception.rs).
diff --git a/docs/planning/parallels-test-harness/RALPH_STATE.md b/docs/planning/parallels-test-harness/RALPH_STATE.md
@@ -0,0 +1,70 @@
+# Parallels Launcher-Test Harness — Ralph State
+
+**Goal (operator, 2026-06-01):** Build an automated testing framework that drives the
+real GUI input path inside Parallels — simulate the launcher gesture, open the launcher,
+launch the terminal, type into it, and validate it works — so we can test at scale.
+
+**Exit criteria (hard):** the `parallels-launcher-test` workflow reports
+`consecutiveGreenAchieved = true` — **10 consecutive green runs** of
+gesture → launcher opens → select terminal → Enter → `/bin/bterm` launches, validated.
+
+## Loop protocol (sequential Ralph)
+Each turn = **implement/fix the framework, then validate with 10 consecutive runs.**
+Stop the loop only when 10-in-a-row pass. Diagnose failures honestly — if a failure is a
+real Breenix launcher bug (not a harness timing issue), surface it; do not weaken the test.
+
+## Status
+- **Phase 1 — ship branch: DONE.** `fix/aarch64-stale-cached-ttbr0-dispatch` → PR #410 → merged to `main` (`134c532b`). Local `main` synced.
+- **Phase 2 — construction workflow: COMPLETED, blocked at spike.** Run `wf_c890dfff-d68`.
+  - Boot ✅ VM `breenix-1780359459`, BWM compositing. Ready marker: `[bwm] hotkeys: using built-in defaults for early boot`.
+  - Code-recon ✅ Full recipe known: trigger=double-tap Super (`bwm.rs:315`); `blauncher` pre-selects `APPS[0]="Terminal"` → Enter alone launches `/bin/bterm`. Oracles: `[spawn] path='/bin/blauncher'`, `[spawn] path='/bin/bterm'`, `[bterm] config:`.
+  - Spike ❌ **HARD host-side blocker:** `prlctl send-key-event` accepted but keystrokes DROPPED before the guest (modifier-free `=` into focused window changed nothing; no hotkey `[spawn]`). Evidence points to missing macOS TCC Accessibility/Input-Monitoring for Parallels + a detached VM GUI view (stale `prlctl capture`). Spike wrote `logs/parallels-launcher-test/inject.sh` + evidence.
+  - **OPEN QUESTION being resolved:** is the blocker the detached/headless window (autonomously fixable) or a TCC grant (needs operator)? Decisive test: bring VM window on-screen+focused, inject `=` into Bounce, watch speed.
+
+## VERDICT (2026-06-01 night)
+- **Harness: BUILT & verified.** `scripts/parallels/inject.sh`, `scripts/parallels/launcher-smoke.sh`, `.claude/workflows/parallels-launcher-test.js`, `docs/planning/parallels-test-harness/README.md`. Injection method isolated to one config block (`SUPER_PREFIX=224 SUPER_CODE=91 INTER_TAP_MS=150 ENTER_CODE=28`).
+- **Parallels injection blocker ROOT-CAUSED: the macOS screen is LOCKED.** `CGSSessionScreenIsLocked=True` → VM console detached → `prlctl send-key-event` accepted (rc=0) but silently dropped (functional `=`-into-Bounce test: no effect; no hotkey `[spawn]`). NOT a TCC grant (send-key-event injects into the virtual XHCI HID via prl_disp_service, not via macOS CGEvent/PostEvent). NOT a run.sh misconfig. Guest USB keyboard is healthy/enumerated — input just never lands. Evidence: `logs/parallels-launcher-test/unblock-2026-06-01-rootcause.txt`.
+- **OPERATOR ACTION to validate on Parallels:** physically unlock the Mac at the console, then `caffeinate -d &` (prevent re-lock), then run `bash scripts/parallels/launcher-smoke.sh` (or the `parallels-launcher-test` workflow). There is no non-interactive unlock bypass.
+
+## QEMU logic-validation pivot — EVALUATED, NOT VIABLE
+We considered QEMU as a lock-independent alternative (QEMU injects keys via its own
+monitor, not macOS events). It does **not** work for this flow, for two independent reasons:
+- **BWM never starts on QEMU** — BWM's ARM64 path needs the VirGL 3D compositor
+  (Parallels-specific; absent on QEMU here), so the window manager never comes up.
+- **SUPER never observed on QEMU** — the double-tap-Super hotkey reads `SUPER_PRESSED`
+  only from the USB-HID/xHCI driver, which never enumerates on QEMU. The `virtio-keyboard`
+  MMIO driver never tracks Super, so the gesture can't be recognized.
+Making QEMU viable would require kernel changes (software-compositor fallback for BWM +
+a `virtio-keyboard`→SUPER bridge) — out of scope for this host-side harness.
+For reference, the working QEMU ARM64 boot recipe is `-M virt,gic-version=3 -cpu max`
+(run.sh's `cortex-a72` hangs); run.sh exposes a monitor on `tcp:127.0.0.1:4444` + QMP at
+`/tmp/breenix-qmp.sock`.
+**Conclusion: the 10× validation must run on Parallels with an unlocked Mac. No QEMU substitute.**
+
+## Architecture decisions (resolved this session)
+- **Trigger is double-tap SUPER, not double-Control.** `bwm.rs` `load_defaults()` (aarch64,
+  hardcoded; config loading is x86-only) binds `SUPER+SUPER (taps=2) → exec /bin/blauncher`
+  and `SUPER+Return → exec /bin/bterm`. The operator's "double control key" = the
+  double-tap-Super gesture (Mac Command maps to guest Super). We test the launcher path.
+- **Injection = `prlctl send-key-event <VM> --scancode <ps2-set1> --event press|release`**
+  (NOT CGEvents — no Accessibility/focus needed; Parallels translates set-1 → guest USB-HID).
+  ASCII proven in `scripts/parallels/type-in-vm.sh`. Super = extended `0xE0 0x5B` (224 then 91)
+  — exact prlctl form determined empirically by the spike phase.
+- **Validation = serial markers (primary) + `scripts/parallels/capture-display.sh` PIL pixel
+  probe (secondary).** PASS requires real evidence `/bin/bterm` launched — never "process created".
+- **VM lifecycle:** only via `./run.sh --parallels [--no-build]` (fresh epoch VM, tails serial
+  forever → background it; serial at `/tmp/breenix-parallels-serial.log`; ~60-90s VirGL warmup
+  before capture is trustworthy).
+
+## Deliverables
+- `scripts/parallels/launcher-smoke.sh` — one full run → `RESULT: PASS|FAIL` + evidence.
+- `.claude/workflows/parallels-launcher-test.js` — runs the smoke script sequentially up to
+  15×, requires 10 consecutive PASS, reports the streak + first failure.
+- `docs/planning/parallels-test-harness/README.md` — the proven recipe + how-to.
+- Evidence under `logs/parallels-launcher-test/`.
+
+## Next action when the construction workflow completes
+- `ok=true` → invoke the `parallels-launcher-test` workflow for the 10× gate.
+- failed at Boot/Spike → diagnose (injection timing vs. real Breenix launcher bug),
+  fix host-side or report the Breenix bug, then re-run.
+- After 10 green → commit the harness on a feature branch, open a PR, notify operator.