diff --git a/.claude/workflows/parallels-launcher-test.js b/.claude/workflows/parallels-launcher-test.js new file mode 100644 index 00000000..a3477f4f --- /dev/null +++ b/.claude/workflows/parallels-launcher-test.js @@ -0,0 +1,102 @@ +export const meta = { + name: 'parallels-launcher-test', + description: 'Drive the Breenix launcher->terminal smoke test on a fresh Parallels VM, sequentially (one VM, never parallel), measuring the consecutive-green streak until 10-in-a-row or 15 attempts.', + phases: [ + { title: 'Gate', detail: 'Run launcher-smoke.sh --no-build up to 15 times sequentially; stop early at a 10-consecutive-PASS streak.' }, + ], +}; + +const MAX_ATTEMPTS = 15; +const TARGET_STREAK = 10; + +const ATTEMPT_SCHEMA = { + type: 'object', + additionalProperties: false, + properties: { + pass: { type: 'boolean', description: 'true ONLY if the script printed exactly "RESULT: PASS"' }, + reason: { type: 'string', description: 'For a FAIL, the text after "RESULT: FAIL:"; for a PASS, "ok".' }, + injectionMs: { type: 'integer', description: 'The double-tap injection wall-time in ms from the smoke log line "double-tap injection wall-time: ms", or -1 if not found.' }, + launcherOpened: { type: 'boolean', description: 'true if the serial/evidence shows the launcher opened this run.' }, + evidencePath: { type: 'string', description: 'Absolute path to the run-* evidence dir (from result.txt evidence_dir=), or empty string.' }, + }, + required: ['pass', 'reason', 'evidencePath'], +}; + +const ATTEMPT_PROMPT = [ + 'Run the Breenix launcher->terminal smoke test ONCE and report the structured outcome.', + '', + 'HOW TO RUN (mandatory):', + '- Use the Bash tool with dangerouslyDisableSandbox:true AND run_in_background:true.', + '- Command (note --no-build: artifacts already exist; a per-run rebuild is wrong and wasteful):', + ' bash /Users/wrb/fun/code/breenix/scripts/parallels/launcher-smoke.sh --no-build', + '- A single run takes ~6-10 min (fresh VM boot + ~60s VirGL warmup + injection + validation).', + '- Because it is backgrounded, poll its output every ~30s until it prints a line beginning with "RESULT:".', + ' Do NOT give up early; wait for the RESULT line or for the process to exit (allow up to ~22 min).', + '', + 'BEFORE running, confirm the macOS screen is UNLOCKED:', + ' python3 -c "import Quartz;d=Quartz.CGSessionCopyCurrentDictionary();print(\'LOCKED\' if (d and d.get(\'CGSSessionScreenIsLocked\')) else \'UNLOCKED\')"', + ' If it prints LOCKED, do NOT run; return pass=false, reason="aborted: macOS screen is locked (Parallels drops injected keys)".', + '', + 'PARSING THE OUTCOME (be strictly honest):', + '- pass = true ONLY if the final line is exactly "RESULT: PASS".', + '- If "RESULT: FAIL: ", pass=false and reason = the text after "RESULT: FAIL:".', + '- If no RESULT line is ever printed, pass=false and reason="no RESULT line emitted".', + '- injectionMs = the integer from the smoke log line "double-tap injection wall-time: ms" (look in the backgrounded output / the run dir); -1 if not found. (>350ms means the double-tap likely missed its 400ms window.)', + '- launcherOpened = true if the run evidence/serial shows the launcher opened (e.g. grep the run dir / serial for "[spawn] path=\'/bin/blauncher\'").', + '- evidencePath = the "evidence_dir=" value from the run\'s result.txt (under /Users/wrb/fun/code/breenix/logs/parallels-launcher-test/run-/); empty string if unknown.', + '', + 'Never report pass=true on "launcher opened" or "process created" alone — only on the exact "RESULT: PASS" line.', + 'Do NOT run multiple VMs in parallel; this single run owns the one Parallels VM. Do NOT modify any files.', +].join('\n'); + +phase('Gate'); + +let consecutive = 0; +let greenStreakMax = 0; +let attempts = 0; +let firstFailure = null; +let lastEvidenceDir = ''; +const perAttempt = []; + +for (let i = 1; i <= MAX_ATTEMPTS; i++) { + attempts = i; + log('Attempt ' + i + '/' + MAX_ATTEMPTS + ' — consecutive-green streak: ' + consecutive + '/' + TARGET_STREAK); + + const result = await agent(ATTEMPT_PROMPT, { schema: ATTEMPT_SCHEMA, label: 'attempt-' + i, phase: 'Gate' }); + + const r = result || { pass: false, reason: 'agent returned null', injectionMs: -1, launcherOpened: false, evidencePath: '' }; + perAttempt.push({ attempt: i, pass: r.pass, reason: r.reason, injectionMs: r.injectionMs, launcherOpened: r.launcherOpened }); + if (r.evidencePath) { + lastEvidenceDir = r.evidencePath; + } + + if (r.pass) { + consecutive = consecutive + 1; + if (consecutive > greenStreakMax) { + greenStreakMax = consecutive; + } + log('Attempt ' + i + ' PASS — streak now ' + consecutive + '/' + TARGET_STREAK + ' (inject ' + r.injectionMs + 'ms)'); + if (consecutive >= TARGET_STREAK) { + log('Reached ' + TARGET_STREAK + ' consecutive green; stopping after ' + i + ' attempts.'); + break; + } + } else { + if (firstFailure === null) { + firstFailure = { attempt: i, reason: r.reason, injectionMs: r.injectionMs, launcherOpened: r.launcherOpened, evidencePath: r.evidencePath }; + } + log('Attempt ' + i + ' FAIL (' + r.reason + ') — streak reset ' + consecutive + ' -> 0; continuing to measure flakiness.'); + consecutive = 0; + } +} + +const consecutiveGreenAchieved = greenStreakMax >= TARGET_STREAK; +log('Done. attempts=' + attempts + ' greenStreakMax=' + greenStreakMax + ' consecutiveGreenAchieved=' + consecutiveGreenAchieved); + +return { + consecutiveGreenAchieved: consecutiveGreenAchieved, + greenStreakMax: greenStreakMax, + attempts: attempts, + firstFailure: firstFailure, + perAttempt: perAttempt, + evidenceDir: lastEvidenceDir, +}; diff --git a/docs/planning/aarch64-launcher-spawn-crash/ROOT_CAUSE.md b/docs/planning/aarch64-launcher-spawn-crash/ROOT_CAUSE.md new file mode 100644 index 00000000..db42ec3c --- /dev/null +++ b/docs/planning/aarch64-launcher-spawn-crash/ROOT_CAUSE.md @@ -0,0 +1,102 @@ +# AArch64 launcher-spawn intermittent crash — root cause + fix proposal + +**Status (2026-06-02):** Root cause CONFIRMED (high confidence on the proximate +mechanism; medium on the exact upstream writer). **Fix is gold-master and awaits +operator signoff** — see "Fix options" + the autopsy caveat. Found by the +automated Parallels launcher-test harness (PR #411). + +## Symptom +Intermittently, on the launcher→terminal path, a CPU takes an unhandled sync +exception at a **page-aligned kernel data address**: +- `[UNHANDLED_EC] cpu=N EC=0x0 ELR=0xffff000040269000` (ESR=0x2000000, "Unknown"), or +- (earlier) `EC=0xe ELR=0xffff00004025d000` (Illegal Execution State). + +The default handler parks/redirects the CPU, so heartbeats continue (looks +"hung"). Rate in an 18-run sweep: **2 EC=0x0 crashes / 18** (~11%); also 4/18 +double-tap input drops (a separate bug). EC=0x0 happened to be survivable +(launcher still PASSed); EC=0xe was fatal to the run. + +## Proximate cause — CONFIRMED +The captured `[FATAL_REGS]` register file **is verbatim `idle_loop_arm64`'s +mid-loop state**, decisively symbolized against `kernel-aarch64` (base +`0xffff000040000000`): + +| reg | value | symbol | +|---|---|---| +| elr (fault PC) | `0x269000` | `scheduler::WAKE_SITE_SCHEDULE` (= `__bss_start`), held in idle's `x21` | +| x30, x22 | `0x269070` | `scheduler::NEED_RESCHED`, idle's `x22` | +| x1 | `0x269080` | `scheduler::CPU_IS_IDLE` | +| x26 | `0x0d7498` | `idle_loop_arm64+0x60` (idle loop body) | +| ctx_elr_el1 / peers' DEFER_SNAP elr | `0x0d5368` | `schedule_from_kernel+0xfc0` (normal "parked in scheduler" PC) | + +`idle_loop_arm64`'s prologue loads `x21=WAKE_SITE_SCHEDULE(0x269000)` and +`x22=NEED_RESCHED(0x269070)`. The fault frame's `elr == idle.x21` and +`x30==x22==idle.x22` — i.e. **a non-idle thread's `Thread.context` was overwritten +with idle's register file** (including `elr_el1 = 0x269000`). When that thread is +later dispatched, `restore_*_context_inline` copies `frame.elr = +thread.context.elr_el1 = 0x269000` and `aarch64_enter_exception_frame` ERETs there. +`0x269000` is `.bss` (zeroed) → `0x00000000` decodes to `UDF #0` → **EC=0x0**. +If instead the corrupt SPSR is illegal, the ERET itself faults → **EC=0xe**. Same bug. + +**Why the existing dispatch guard misses it:** `dispatch_thread_locked` checks +only `frame.elr < 0x1000 || (frame.spsr & 0xF) != 0`. `0x269000 ≥ 0x1000` and (for +an EL0t dispatch) `spsr & 0xF == 0`, so the corrupt context passes. + +## Upstream cause — candidates (medium confidence) +Both reduce to *idle's register file ending up in a non-idle thread's `context`*: +1. **cpu_state / `old_id` save-target skew.** If `cpu_state[cpu].current_thread` + names a userspace thread while the CPU was actually running `idle_loop_arm64` + (e.g. after a ret-based idle dispatch that `br`s to idle without rebuilding + cpu_state, then a timer IRQ), `save_*_context_inline(userspace_thread, + idle_frame)` writes idle's regs into that thread's context. `fix_eret_cpu_state_locked` + is the existing band-aid but only fires for EL0 frames. +2. **Reused fork kernel stack carrying a stale frame** (commit `04c9655a`, + bitmap-backed kstack reuse; the fault SP is in that region) — a child whose + reused kstack still holds a prior idle/scheduler exception frame. + +Implicated machinery is exactly what the branch's cluster reshaped: `04c9655a` +(fork kstack reuse), `969ecce2` (CLONE_VM exec), `90a971ce` (stale cached TTBR0 +requeue). Likely a **residual cpu_state/stack-ownership skew** from that cluster, +not a fresh regression — and almost certainly the same root behind the operator's +original launcher→terminal lockup and the prior ~week-long crash hunt +(`ELR=0x8`/`0x1e`/`0x3b9aca00`/`EC=0x18` were the same corridor). + +## Fix options (BOTH are gold-master → operator signoff required) +1. **Root fix (preferred): stop the bad save.** Correct the save-target selection + in `check_need_resched_and_switch_arm64` / `save_*_context_inline` so idle's + register file is never saved into a non-idle thread's context (fix the + cpu_state/`old_id` skew, or the reused-stack stale frame). Requires pinning + which of the two writers — see "Confirm the writer" below. +2. **Defense-in-depth: privilege-aware dispatch guard.** Reject any dispatch where + `frame.elr` is inconsistent with the target EL (EL0 dispatch → elr must be a + userspace VA, not a kernel VA; EL1 dispatch → elr must be in `.text`), and + safely terminate/requeue the victim instead of ERETing into data. + **⚠ AUTOPSY CAVEAT:** `context_switch.rs` is gold-master and the autopsy + (`docs/planning/cpu0-user-guard-autopsy/README.md`) explicitly warns **"NO + CPU0-specific EL0 dispatch guard"** — a dispatch guard here caused a week-long + regression (PR #334). This option intersects that frozen concern and must be + designed + reviewed with the autopsy in hand. It mitigates + diagnoses but does + not fix the upstream save-skew. + +## Confirm the writer (needed before the root fix) +This crash is **Parallels-only** (BWM/VirGL), so the QEMU GDB workflow cannot reach +it. Confirmation must be in-kernel + Parallels repro: +- Add a **lock-free trace event** (or a small per-CPU ring) at the save site + recording `(old_id, executing-is-idle, cpu_state.current_thread, cpu)` — to + prove the save-target skew directly. **This touches the gold-master save path → + signoff.** Then reproduce via the launcher harness and read the capture. +- The enhanced postmortem (`[FATAL_REGS]`/`[FATAL_THREAD]`, committed `b1961217`, + exception.rs — not gold-master) already proves the proximate cause; extend it + with `cpu_state` at fault if a cheaper signal is wanted. + +## How to validate a fix +Run the launcher harness gate (`scripts/parallels/launcher-smoke.sh` / +`.claude/workflows/parallels-launcher-test.js`) — the EC=0x0/EC=0xe crashes must +disappear across a multi-run sweep. The harness already reports kernel faults +distinctly (`RESULT: FAIL: KERNEL FAULT ...`). + +## Evidence +- `logs/parallels-launcher-test/run-20260602-202819/run-sh.log` (EC=0x0 + full + `[FATAL_REGS]`/`[FATAL_THREAD]`/trace ring), `run-20260602-204127` (2nd capture), + and the earlier EC=0xe `run-20260602-124137`. +- Enhanced postmortem: commit `b1961217` (exception.rs). diff --git a/docs/planning/parallels-test-harness/RALPH_STATE.md b/docs/planning/parallels-test-harness/RALPH_STATE.md new file mode 100644 index 00000000..fac7be96 --- /dev/null +++ b/docs/planning/parallels-test-harness/RALPH_STATE.md @@ -0,0 +1,70 @@ +# Parallels Launcher-Test Harness — Ralph State + +**Goal (operator, 2026-06-01):** Build an automated testing framework that drives the +real GUI input path inside Parallels — simulate the launcher gesture, open the launcher, +launch the terminal, type into it, and validate it works — so we can test at scale. + +**Exit criteria (hard):** the `parallels-launcher-test` workflow reports +`consecutiveGreenAchieved = true` — **10 consecutive green runs** of +gesture → launcher opens → select terminal → Enter → `/bin/bterm` launches, validated. + +## Loop protocol (sequential Ralph) +Each turn = **implement/fix the framework, then validate with 10 consecutive runs.** +Stop the loop only when 10-in-a-row pass. Diagnose failures honestly — if a failure is a +real Breenix launcher bug (not a harness timing issue), surface it; do not weaken the test. + +## Status +- **Phase 1 — ship branch: DONE.** `fix/aarch64-stale-cached-ttbr0-dispatch` → PR #410 → merged to `main` (`134c532b`). Local `main` synced. +- **Phase 2 — construction workflow: COMPLETED, blocked at spike.** Run `wf_c890dfff-d68`. + - Boot ✅ VM `breenix-1780359459`, BWM compositing. Ready marker: `[bwm] hotkeys: using built-in defaults for early boot`. + - Code-recon ✅ Full recipe known: trigger=double-tap Super (`bwm.rs:315`); `blauncher` pre-selects `APPS[0]="Terminal"` → Enter alone launches `/bin/bterm`. Oracles: `[spawn] path='/bin/blauncher'`, `[spawn] path='/bin/bterm'`, `[bterm] config:`. + - Spike ❌ **HARD host-side blocker:** `prlctl send-key-event` accepted but keystrokes DROPPED before the guest (modifier-free `=` into focused window changed nothing; no hotkey `[spawn]`). Evidence points to missing macOS TCC Accessibility/Input-Monitoring for Parallels + a detached VM GUI view (stale `prlctl capture`). Spike wrote `logs/parallels-launcher-test/inject.sh` + evidence. + - **OPEN QUESTION being resolved:** is the blocker the detached/headless window (autonomously fixable) or a TCC grant (needs operator)? Decisive test: bring VM window on-screen+focused, inject `=` into Bounce, watch speed. + +## VERDICT (2026-06-01 night) +- **Harness: BUILT & verified.** `scripts/parallels/inject.sh`, `scripts/parallels/launcher-smoke.sh`, `.claude/workflows/parallels-launcher-test.js`, `docs/planning/parallels-test-harness/README.md`. Injection method isolated to one config block (`SUPER_PREFIX=224 SUPER_CODE=91 INTER_TAP_MS=150 ENTER_CODE=28`). +- **Parallels injection blocker ROOT-CAUSED: the macOS screen is LOCKED.** `CGSSessionScreenIsLocked=True` → VM console detached → `prlctl send-key-event` accepted (rc=0) but silently dropped (functional `=`-into-Bounce test: no effect; no hotkey `[spawn]`). NOT a TCC grant (send-key-event injects into the virtual XHCI HID via prl_disp_service, not via macOS CGEvent/PostEvent). NOT a run.sh misconfig. Guest USB keyboard is healthy/enumerated — input just never lands. Evidence: `logs/parallels-launcher-test/unblock-2026-06-01-rootcause.txt`. +- **OPERATOR ACTION to validate on Parallels:** physically unlock the Mac at the console, then `caffeinate -d &` (prevent re-lock), then run `bash scripts/parallels/launcher-smoke.sh` (or the `parallels-launcher-test` workflow). There is no non-interactive unlock bypass. + +## QEMU logic-validation pivot — EVALUATED, NOT VIABLE +We considered QEMU as a lock-independent alternative (QEMU injects keys via its own +monitor, not macOS events). It does **not** work for this flow, for two independent reasons: +- **BWM never starts on QEMU** — BWM's ARM64 path needs the VirGL 3D compositor + (Parallels-specific; absent on QEMU here), so the window manager never comes up. +- **SUPER never observed on QEMU** — the double-tap-Super hotkey reads `SUPER_PRESSED` + only from the USB-HID/xHCI driver, which never enumerates on QEMU. The `virtio-keyboard` + MMIO driver never tracks Super, so the gesture can't be recognized. +Making QEMU viable would require kernel changes (software-compositor fallback for BWM + +a `virtio-keyboard`→SUPER bridge) — out of scope for this host-side harness. +For reference, the working QEMU ARM64 boot recipe is `-M virt,gic-version=3 -cpu max` +(run.sh's `cortex-a72` hangs); run.sh exposes a monitor on `tcp:127.0.0.1:4444` + QMP at +`/tmp/breenix-qmp.sock`. +**Conclusion: the 10× validation must run on Parallels with an unlocked Mac. No QEMU substitute.** + +## Architecture decisions (resolved this session) +- **Trigger is double-tap SUPER, not double-Control.** `bwm.rs` `load_defaults()` (aarch64, + hardcoded; config loading is x86-only) binds `SUPER+SUPER (taps=2) → exec /bin/blauncher` + and `SUPER+Return → exec /bin/bterm`. The operator's "double control key" = the + double-tap-Super gesture (Mac Command maps to guest Super). We test the launcher path. +- **Injection = `prlctl send-key-event --scancode --event press|release`** + (NOT CGEvents — no Accessibility/focus needed; Parallels translates set-1 → guest USB-HID). + ASCII proven in `scripts/parallels/type-in-vm.sh`. Super = extended `0xE0 0x5B` (224 then 91) + — exact prlctl form determined empirically by the spike phase. +- **Validation = serial markers (primary) + `scripts/parallels/capture-display.sh` PIL pixel + probe (secondary).** PASS requires real evidence `/bin/bterm` launched — never "process created". +- **VM lifecycle:** only via `./run.sh --parallels [--no-build]` (fresh epoch VM, tails serial + forever → background it; serial at `/tmp/breenix-parallels-serial.log`; ~60-90s VirGL warmup + before capture is trustworthy). + +## Deliverables +- `scripts/parallels/launcher-smoke.sh` — one full run → `RESULT: PASS|FAIL` + evidence. +- `.claude/workflows/parallels-launcher-test.js` — runs the smoke script sequentially up to + 15×, requires 10 consecutive PASS, reports the streak + first failure. +- `docs/planning/parallels-test-harness/README.md` — the proven recipe + how-to. +- Evidence under `logs/parallels-launcher-test/`. + +## Next action when the construction workflow completes +- `ok=true` → invoke the `parallels-launcher-test` workflow for the 10× gate. +- failed at Boot/Spike → diagnose (injection timing vs. real Breenix launcher bug), + fix host-side or report the Breenix bug, then re-run. +- After 10 green → commit the harness on a feature branch, open a PR, notify operator. diff --git a/docs/planning/parallels-test-harness/README.md b/docs/planning/parallels-test-harness/README.md new file mode 100644 index 00000000..1d919872 --- /dev/null +++ b/docs/planning/parallels-test-harness/README.md @@ -0,0 +1,224 @@ +# Parallels Launcher -> Terminal Test Harness + +Reusable host-side automation that drives the Breenix +**launcher -> terminal** flow on a fresh Parallels VM and verifies it with real +serial-log evidence. The harness is host-side tooling only; it does not modify +any kernel or userspace source. + +## Flow under test + +1. Boot Breenix on a fresh Parallels VM via `./run.sh --parallels`. +2. Wait for the window manager (BWM) to be ready. +3. **Double-tap SUPER** -> the launcher (`/bin/blauncher`) opens with + `APPS[0] = "Terminal"` (which maps to `/bin/bterm`) pre-selected. +4. **Press Enter** -> the terminal (`/bin/bterm`) launches. + (Optionally type `term` first to filter the list — "Terminal" stays index 0 — + then Enter.) + +A run **passes only** when the serial log shows the launcher opened **and** the +terminal actually launched and initialized. "Launcher opened" alone is a FAIL. + +## Proven recipe (encoded in the scripts) + +### Boot + +- Boot exclusively via `./run.sh --parallels [--no-build]`. It creates a fresh + epoch-named VM `breenix-`, cleans up old `breenix-*` VMs, and **tails + serial forever** — so it must be run in the background (the smoke script does + this with `nohup ... &` and kills it on exit). +- Serial log: `/tmp/breenix-parallels-serial.log`. `run.sh` removes it fresh on + each boot, so any marker found is from the current boot. + +### Readiness + warmup + +- Readiness marker (grep serial): + `[bwm] hotkeys: using built-in defaults for early boot` +- After readiness, allow ~60s VirGL warmup before trusting display capture. + +### Trigger — double-tap SUPER + +Super is PS/2 set-1 **extended** scancode `0xE0 0x5B`: + +| Field | Value | Notes | +|------------------|-----------|----------------------------------------| +| Extended prefix | `224` | `0xE0` | +| Key code | `91` | `0x5B` (left GUI / Super) | +| Hold per tap | ~40 ms | press -> release dwell | +| Inter-tap gap | ~150 ms | must be `< 400 ms` for a "double" tap | + +A **tap** = (optional `0xE0` prefix press) -> press `91` -> hold -> release `91` +-> (release prefix). A **double-tap** = two taps within 400 ms. + +`Enter` = scancode `28`. + +### Injection mechanism + +`prlctl send-key-event --scancode --event press|release`, wrapped by +the canonical helper `scripts/parallels/inject.sh`: + +```bash +export VM=breenix- # set once for the sequence +scripts/parallels/inject.sh doubletap 91 150 224 # double-Super +scripts/parallels/inject.sh type term # filter text +scripts/parallels/inject.sh enter # press Enter +``` + +Commands: `tap [hold_ms]`, `key [hold_ms]`, `doubletap + [prefix]`, `hold [prefix]`, `type `, `enter`. +The VM name comes from `$VM` (preferred — `export` it once) or the first +positional argument. If `$VM` is empty/unset and no name is passed, `inject.sh` +errors loudly (exit 2) rather than silently no-op'ing. + +### Validation oracles (grep serial, in order) + +| Stage | Serial marker | +|--------------------|-------------------------------------| +| Launcher opened | `[spawn] path='/bin/blauncher'` | +| Terminal launched | `[spawn] path='/bin/bterm'` | +| Terminal init'd | `[bterm] config:` | +| (bonus signal) | `[bterm] spawned child pid=` | + +**PASS requires both** `[spawn] path='/bin/bterm'` **and** `[bterm] config:`. +Honesty rule: never pass on the launcher marker alone — if only the launcher +opened, the run FAILs with that reason. + +## Running a single smoke test + +```bash +scripts/parallels/launcher-smoke.sh [--no-build] [--keep-vm] \ + [--timeout SECS] [--type-filter] +``` + +| Flag | Effect | +|-----------------|-------------------------------------------------------------| +| `--no-build` | Pass `--no-build` through to `run.sh` (reuse artifacts). | +| `--keep-vm` | Don't stop the VM on exit (default: stop with `--kill`). | +| `--timeout SECS`| Overall budget (default 900). | +| `--type-filter` | Type `term` before Enter (default: just Enter). | + +The script: + +- launches `run.sh --parallels` in the background (killed on exit), +- polls serial for the readiness marker, **only trusting it once the serial log + is the fresh one this boot created** (inode differs from any leftover + prior-run log) so a stale marker can't be mistaken for readiness, +- resolves the VM name authoritatively from `run.sh`'s own `VM: breenix-` + stdout line (falling back to `prlctl list -a | grep breenix-`), so a leftover + stuck `breenix-*` VM can never be selected by mistake, +- waits VirGL warmup, then injects double-Super and Enter, +- writes an evidence dir at + `logs/parallels-launcher-test/run-/` containing the serial + excerpt, display screenshots (via `scripts/parallels/capture-display.sh`), and + `result.txt`, +- prints **exactly one** final line: `RESULT: PASS` (exit 0) or + `RESULT: FAIL: ` (exit 1). + +The injection method is a clearly-marked config block at the top of the script +(`SUPER_PREFIX=224`, `SUPER_CODE=91`, `INTER_TAP_MS=150`, `ENTER_CODE=28`). If +the proven trigger changes, edit those values — nothing else needs to change. + +> The smoke script contains **no sandbox logic**. Callers must run it +> un-sandboxed (a wrapper passes `dangerouslyDisableSandbox`). + +## Running the streak workflow + +`.claude/workflows/parallels-launcher-test.js` runs the smoke test +**sequentially** (single VM — never in parallel) and measures stability: + +```js +Workflow({ name: 'parallels-launcher-test' }) +``` + +- Up to **15 attempts**, one `agent()` per attempt; each agent runs + `launcher-smoke.sh` via the Bash tool with `dangerouslyDisableSandbox: true` + and `run_in_background: true` (a run takes ~8-15 min), polling until it sees a + `RESULT:` line. +- Tracks the consecutive-PASS streak. **Stops early on a 10-in-a-row streak.** + On any FAIL it records the streak + evidence and **continues** (to measure + flakiness) until 15 attempts or the 10-streak is achieved. +- Returns `{ consecutiveGreenAchieved, greenStreakMax, attempts, firstFailure, + evidenceDir }`. + +## Host prerequisites & known limitations + +These were root-caused during the build-out (2026-06-01). Read them before +running, especially for unattended runs. + +### The macOS screen MUST be unlocked + +`prlctl send-key-event` reaches the guest only when the Mac console is +**unlocked**. With the console locked, Parallels detaches the VM window and +**silently drops** every injected keystroke: `send-key-event` returns `rc=0` +but the key never lands in the guest (proven functionally — injecting `=` into +the Bounce demo changed nothing; no hotkey `[spawn]` appeared). + +This is **not** a TCC / Accessibility / Input-Monitoring permissions issue and +there is **no permissions grant that fixes it**. Injection goes through the +virtual xHCI HID via `prl_disp_service`, not through macOS CGEvent/`CGPostEvent` +— so TCC is never consulted. A locked console simply has no presented VM +console for the HID stream to attach to. + +There is **no non-interactive unlock bypass**. The smoke script preflights this +and refuses to run on a locked Mac: + +```bash +# One-line lock check (exit 0 = locked, 1 = unlocked): +python3 -c "import Quartz,sys; d=Quartz.CGSessionCopyCurrentDictionary(); sys.exit(0 if (d and d.get('CGSSessionScreenIsLocked')) else 1)" +``` + +On a locked screen the script prints +`RESULT: FAIL: macOS screen is locked — ...` and exits 1 rather than producing +a misleading boot/injection failure. + +### Unattended / overnight runs (testing at scale) + +For runs without a human present: + +1. **Disable auto-lock.** System Settings -> Lock Screen -> + "Require password after screen saver begins/display is turned off" = **Never + / Off**. Otherwise the screen re-locks mid-run and injection silently dies. +2. **Keep the display awake** with `caffeinate -d` for the run's duration. The + smoke script starts `caffeinate -d &` automatically (and kills it on exit), + but disabling auto-lock is still required because `caffeinate` prevents sleep, + not the lock that fires on display-off. + +These two together are the requirement for driving the launcher flow at scale +unattended. + +### QEMU is NOT a viable substitute for this flow + +QEMU was evaluated as a lock-independent alternative (it injects keys via its +own monitor, not macOS events). It does **not** work for this specific flow, for +two independent reasons: + +- **BWM never starts on QEMU.** BWM's ARM64 path requires the **VirGL 3D + compositor**, which is Parallels-specific and absent on the QEMU build here. + With no compositor, BWM does not come up, so there is nothing to drive. +- **SUPER is never observed on QEMU.** The double-tap-Super hotkey reads + `SUPER_PRESSED` exclusively from the **USB-HID / xHCI** driver, which never + enumerates on QEMU. QEMU's `virtio-keyboard` MMIO driver never tracks the + Super modifier, so the gesture cannot be recognized even if keys arrive. + +Making QEMU viable would require **kernel changes** (a software-compositor +fallback for BWM, plus a `virtio-keyboard`->SUPER bridge) — explicitly out of +scope for this host-side harness. + +For reference, the working QEMU ARM64 boot recipe is `-M virt,gic-version=3 +-cpu max` (run.sh's `cortex-a72` hangs). `run.sh` exposes a QEMU monitor on +`tcp:127.0.0.1:4444` and a QMP socket at `/tmp/breenix-qmp.sock`, which is how +keys would be injected if the two kernel gaps above were closed. + +### If the injection method changes + +A separate effort may change the injection primitive. If it does (different key, +non-extended encoding, or a new mechanism entirely), update the config block at +the top of `scripts/parallels/launcher-smoke.sh` (`SUPER_PREFIX`, `SUPER_CODE`, +`INTER_TAP_MS`, `ENTER_CODE`) and, if the primitive itself changes, the +`press`/`release`/`tap` logic in `scripts/parallels/inject.sh`. + +## Exit criterion + +The harness is considered green when the workflow reports +**10 consecutive `RESULT: PASS` runs** (`consecutiveGreenAchieved: true`, +`greenStreakMax >= 10`). +``` diff --git a/kernel/src/arch_impl/aarch64/exception.rs b/kernel/src/arch_impl/aarch64/exception.rs index d7b0304f..0bb1c629 100644 --- a/kernel/src/arch_impl/aarch64/exception.rs +++ b/kernel/src/arch_impl/aarch64/exception.rs @@ -1028,6 +1028,113 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr: raw_uart_str(" ELR="); raw_uart_hex(frame_ref.elr); raw_uart_str("\n"); + + // Full fatal register dump. EC=0xe (Illegal Execution State) means + // an ERET restored an illegal PSTATE — we MUST see SPSR/ESR/FAR plus + // the GP registers to confirm which stale ELR/SPSR was restored. + // This is the fatal park path (interrupts already masked above), so + // a full dump is appropriate; uses the same lock-free raw_uart path + // as the [UNHANDLED_EC] line. + // SP at crash time = frame address + 272 (exception frame size), + // matching the convention used by the other fatal handlers. + let sp_at_crash = frame_ref as *const _ as u64 + 272; + raw_uart_str("[FATAL_REGS] cpu="); + raw_uart_dec(cpu_id as u64); + raw_uart_str(" spsr="); + raw_uart_hex(frame_ref.spsr); + raw_uart_str(" esr="); + raw_uart_hex(esr); + raw_uart_str(" far="); + raw_uart_hex(far); + raw_uart_str(" elr="); + raw_uart_hex(frame_ref.elr); + raw_uart_str(" sp="); + raw_uart_hex(sp_at_crash); + raw_uart_str("\n x0="); + raw_uart_hex(frame_ref.x0); + raw_uart_str(" x1="); + raw_uart_hex(frame_ref.x1); + raw_uart_str(" x2="); + raw_uart_hex(frame_ref.x2); + raw_uart_str(" x3="); + raw_uart_hex(frame_ref.x3); + raw_uart_str("\n x4="); + raw_uart_hex(frame_ref.x4); + raw_uart_str(" x5="); + raw_uart_hex(frame_ref.x5); + raw_uart_str(" x6="); + raw_uart_hex(frame_ref.x6); + raw_uart_str(" x7="); + raw_uart_hex(frame_ref.x7); + raw_uart_str("\n x8="); + raw_uart_hex(frame_ref.x8); + raw_uart_str(" x9="); + raw_uart_hex(frame_ref.x9); + raw_uart_str(" x10="); + raw_uart_hex(frame_ref.x10); + raw_uart_str(" x11="); + raw_uart_hex(frame_ref.x11); + raw_uart_str("\n x12="); + raw_uart_hex(frame_ref.x12); + raw_uart_str(" x13="); + raw_uart_hex(frame_ref.x13); + raw_uart_str(" x14="); + raw_uart_hex(frame_ref.x14); + raw_uart_str(" x15="); + raw_uart_hex(frame_ref.x15); + raw_uart_str("\n x16="); + raw_uart_hex(frame_ref.x16); + raw_uart_str(" x17="); + raw_uart_hex(frame_ref.x17); + raw_uart_str(" x18="); + raw_uart_hex(frame_ref.x18); + raw_uart_str(" x19="); + raw_uart_hex(frame_ref.x19); + raw_uart_str("\n x20="); + raw_uart_hex(frame_ref.x20); + raw_uart_str(" x21="); + raw_uart_hex(frame_ref.x21); + raw_uart_str(" x22="); + raw_uart_hex(frame_ref.x22); + raw_uart_str(" x23="); + raw_uart_hex(frame_ref.x23); + raw_uart_str("\n x24="); + raw_uart_hex(frame_ref.x24); + raw_uart_str(" x25="); + raw_uart_hex(frame_ref.x25); + raw_uart_str(" x26="); + raw_uart_hex(frame_ref.x26); + raw_uart_str(" x27="); + raw_uart_hex(frame_ref.x27); + raw_uart_str("\n x28="); + raw_uart_hex(frame_ref.x28); + raw_uart_str(" x29="); + raw_uart_hex(frame_ref.x29); + raw_uart_str(" x30="); + raw_uart_hex(frame_ref.x30); + raw_uart_str("\n"); + + // Optional [FATAL_THREAD]: the currently-dispatched thread's + // saved_by_inline_schedule flag and saved context.elr_el1. Read via + // try_dump_state() (SCHEDULER.try_lock — returns None instead of + // blocking, so it can NEVER deadlock; documented interrupt-safe) and + // is already used by the PC_ALIGN fatal handler above. We only read + // the current thread's entry. + if let Some(tid) = crate::task::scheduler::current_thread_id() { + if let Some(dump) = crate::task::scheduler::try_dump_state() { + if let Some(thread) = dump.threads.iter().find(|t| t.id == tid) { + raw_uart_str("[FATAL_THREAD] tid="); + raw_uart_dec(tid); + raw_uart_str(" saved_by_inline_schedule="); + raw_uart_dec(if thread.saved_by_inline_schedule { 1 } else { 0 }); + raw_uart_str(" ctx_elr_el1="); + raw_uart_hex(thread.elr_el1); + raw_uart_str("\n"); + } + } else { + raw_uart_str("[FATAL_THREAD] scheduler lock busy; thread state skipped\n"); + } + } } dump_fatal_postmortem_once("UNHANDLED_EC"); // Redirect to idle instead of hanging — allows system to recover. diff --git a/kernel/src/drivers/usb/hid.rs b/kernel/src/drivers/usb/hid.rs index a3ca7efa..a5af43f2 100644 --- a/kernel/src/drivers/usb/hid.rs +++ b/kernel/src/drivers/usb/hid.rs @@ -37,6 +37,13 @@ static CAPS_LOCK_ACTIVE: AtomicBool = AtomicBool::new(false); /// Super/GUI key state tracking (exposed to userspace via poll_modifier_state) static SUPER_PRESSED: AtomicBool = AtomicBool::new(false); +/// Latched count of Super press-edges (0→1 transitions) since last read. +/// Incremented on the rising edge of SUPER the instant the HID report arrives, +/// regardless of when the compositor polls. Cleared atomically by +/// take_super_tap_count(). This ensures a fast tap whose ~30ms high window +/// falls entirely between two bursty compositor-wait polls is never lost — +/// mirroring the MOUSE_BUTTONS_PRESSED latch pattern. +static SUPER_TAP_COUNT: AtomicU32 = AtomicU32::new(0); /// Alt key state tracking static ALT_PRESSED: AtomicBool = AtomicBool::new(false); @@ -224,7 +231,16 @@ pub fn process_keyboard_report(report: &[u8]) { || (modifiers & 0x10) != 0 || (modifiers & 0x08) != 0 || (modifiers & 0x80) != 0; - SUPER_PRESSED.store(super_now, Ordering::Relaxed); + // Latch every rising edge of SUPER so a tap that completes (press+release) + // entirely between two compositor polls is still counted exactly once. + let was_super = SUPER_PRESSED.swap(super_now, Ordering::Relaxed); + if super_now && !was_super { + SUPER_TAP_COUNT.fetch_add(1, Ordering::Relaxed); + // Wake the compositor (same proven lock-free path the mouse latch uses) + // so a Super tap triggers the hotkey check with low latency even when no + // window is dirty and the mouse is idle. + crate::syscall::graphics::wake_compositor_if_waiting(); + } // Track Alt key state (bits 2/6) let alt = (modifiers & 0x04) != 0 || (modifiers & 0x40) != 0; @@ -476,6 +492,25 @@ pub fn poll_modifier_state() -> u32 { state } +/// Consume the latched count of Super press-edges since the last call. +/// +/// Returns the number of 0→1 SUPER transitions captured at HID-report time and +/// resets the latch to 0. Used by BWM's double-tap detection so taps that arrive +/// between compositor-wait polls are not dropped. This complements (does not +/// replace) the level-based poll_modifier_state used for modifier+key combos. +pub fn take_super_tap_count() -> u32 { + SUPER_TAP_COUNT.swap(0, Ordering::Relaxed) +} + +/// Check for pending latched Super press-edges (non-consuming peek). +/// +/// Used by compositor_ready_bits so a Super tap that completed between polls +/// makes compositor_wait return (rather than re-blocking) and BWM gets a chance +/// to drain the tap count. Mirrors has_pending_press() for the mouse latch. +pub fn has_pending_super_tap() -> bool { + SUPER_TAP_COUNT.load(Ordering::Relaxed) != 0 +} + /// Get current mouse position in screen coordinates. pub fn mouse_position() -> (u32, u32) { ( diff --git a/kernel/src/syscall/graphics.rs b/kernel/src/syscall/graphics.rs index 3c6fd0ce..8e9d306b 100644 --- a/kernel/src/syscall/graphics.rs +++ b/kernel/src/syscall/graphics.rs @@ -240,6 +240,12 @@ fn compositor_ready_bits(last_registry_gen: u64, prev_mouse: u64) -> (u64, u64, if cur_reg_gen != last_registry_gen { ready |= 4; } + // Keyboard readiness: a latched Super press-edge (captured at HID-report + // time) means a hotkey tap may have completed between polls. Surface it so + // compositor_wait returns and BWM drains the latch instead of re-blocking. + if crate::drivers::usb::hid::has_pending_super_tap() { + ready |= 8; + } (ready, cur_reg_gen, mouse_packed) } @@ -1329,6 +1335,14 @@ fn handle_virgl_op(cmd: &FbDrawCmd) -> SyscallResult { // F32c waitqueue stress stats. handle_wait_stress_stats(cmd) } + 31 => { + // TakeSuperTapCount: read-and-clear the latched count of Super + // press-edges captured at HID-report time. Lets BWM recover taps + // that completed (press+release) between two compositor polls so + // a correctly-delivered double-tap-Super is never dropped. + let count = crate::drivers::usb::hid::take_super_tap_count(); + SyscallResult::Ok(count as u64) + } _ => { crate::serial_println!("[virgl-op] UNKNOWN op={}", cmd.op); SyscallResult::Err(super::ErrorCode::InvalidArgument as u64) diff --git a/scripts/parallels/inject.sh b/scripts/parallels/inject.sh new file mode 100755 index 00000000..e57936de --- /dev/null +++ b/scripts/parallels/inject.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash +# +# inject.sh — canonical Parallels key-injection helper for Breenix host-side tests. +# +# All scancodes are PS/2 set-1 codes; Parallels translates them to USB-HID and +# delivers them to the guest. Extended keys (cursor keys, GUI/Super, etc.) use a +# 0xE0 (224) prefix byte that is sent as its own press/release around the code. +# +# Each command is delivered as ONE `prlctl send-key-event -j` batch (events read +# from stdin), so inter-event delays are applied precisely by the Parallels +# dispatcher — essential for the timing-sensitive double-tap on a loaded host, +# where 4 separate prlctl spawns would otherwise blow bwm's 400ms window. +# +# The VM name is read from $VM (env) or, if unset, the first positional arg +# *only* for the rare case where a caller wants `inject.sh tap ...`. The +# normal form is `VM=breenix-123 inject.sh ...`. +# +# Commands: +# tap [hold_ms] single press+release of a basic key +# key [hold_ms] alias for tap +# doubletap [prefix] +# two clean taps separated by gap_ms; if a +# prefix is given (e.g. 224 for 0xE0) each tap +# is wrapped with that extended prefix +# hold [prefix] press, wait hold_ms, release (extended-aware) +# type type a lowercase-ascii string (a-z, space, +# digits 0-9) +# enter tap Enter (scancode 28) +# +# Examples: +# VM=breenix-123 scripts/parallels/inject.sh doubletap 91 150 224 # double-Super +# VM=breenix-123 scripts/parallels/inject.sh type term +# VM=breenix-123 scripts/parallels/inject.sh enter +# +# Default timings (override per-call via the hold_ms / gap_ms args): +# HOLD_MS key press-to-release dwell (default 40) +# PREFIX_MS gap around an extended prefix (default 5) +# TYPE_GAP_MS inter-character gap for `type` (default 40) +# +set -euo pipefail + +# ---- defaults (tunable via env) -------------------------------------------- +HOLD_MS="${HOLD_MS:-40}" +PREFIX_MS="${PREFIX_MS:-5}" +TYPE_GAP_MS="${TYPE_GAP_MS:-40}" + +# ---- VM resolution ---------------------------------------------------------- +# Prefer $VM. If $VM is unset/empty, allow the legacy `inject.sh ...` +# form by peeking at $1 only when it does not look like a known command. +if [[ -z "${VM:-}" ]]; then + case "${1:-}" in + tap|key|doubletap|hold|type|enter) : ;; # $1 is a command, VM truly missing + "" ) : ;; + * ) + VM="$1" + shift + ;; + esac +fi +if [[ -z "${VM:-}" ]]; then + echo "inject.sh: error: VM name is empty/unset." >&2 + echo "inject.sh: set it with 'export VM=breenix-' (preferred) or pass the VM name as the first argument." >&2 + exit 2 +fi + +# ---- low-level primitives (single batched -j call) -------------------------- +# Every command's key events are sent as ONE `prlctl send-key-event -j` batch +# read from stdin. This is the critical design point: a double-tap is 4 events +# that must land inside bwm's 400ms window, and 4 SEPARATE prlctl spawns take +# ~1.9s on a loaded host (window blown). As one batch, the inter-event DELAYS are +# applied by the Parallels dispatcher with precise timing, independent of host +# load — so the double-tap always lands in-window regardless of prlctl's +# process-spawn latency. + +# Send a JSON event array (built by the helpers below) as one -j batch via stdin. +send_json() { printf '%s' "$1" | prlctl send-key-event "$VM" -j >/dev/null 2>&1; } + +# Emit the JSON event objects for one (possibly extended) tap: press, hold, release. +# $1 code, $2 hold_ms, $3 extended-prefix (optional, e.g. 224 for 0xE0) +tap_events() { + local code="$1" hold="$2" ext="${3:-}" pre="" post="" + if [[ -n "$ext" ]]; then + pre="{\"scancode\":$ext,\"event\":\"press\"},{\"delay\":$PREFIX_MS}," + post=",{\"delay\":$PREFIX_MS},{\"scancode\":$ext,\"event\":\"release\"}" + fi + printf '%s{"scancode":%s,"event":"press"},{"delay":%s},{"scancode":%s,"event":"release"}%s' \ + "$pre" "$code" "$hold" "$code" "$post" +} + +# Single tap. $1 code, $2 hold_ms (optional), $3 ext-prefix (optional) +tap() { send_json "[$(tap_events "$1" "${2:-$HOLD_MS}" "${3:-}")]"; } + +# Two clean taps separated by gap_ms, sent atomically in ONE batch (the dispatcher +# spaces them by gap_ms). $1 code, $2 gap_ms, $3 ext-prefix (optional) +doubletap() { + local code="$1" gap="${2:-150}" ext="${3:-}" + send_json "[$(tap_events "$code" "$HOLD_MS" "$ext"),{\"delay\":$gap},$(tap_events "$code" "$HOLD_MS" "$ext")]" +} + +# Press, hold for hold_ms, release. $1 code, $2 hold_ms, $3 ext-prefix (optional) +hold() { send_json "[$(tap_events "$1" "${2:-100}" "${3:-}")]"; } + +# PS/2 set-1 scancodes for printable characters we support in `type`. +declare -A SC=( + [a]=30 [b]=48 [c]=46 [d]=32 [e]=18 [f]=33 [g]=34 [h]=35 [i]=23 [j]=36 + [k]=37 [l]=38 [m]=50 [n]=49 [o]=24 [p]=25 [q]=16 [r]=19 [s]=31 [t]=20 + [u]=22 [v]=47 [w]=17 [x]=45 [y]=21 [z]=44 + [1]=2 [2]=3 [3]=4 [4]=5 [5]=6 [6]=7 [7]=8 [8]=9 [9]=10 [0]=11 + [' ']=57 +) + +# Type a string as ONE -j batch: press+release each char, spaced by TYPE_GAP_MS. +type_str() { + local s="$1" i ch code parts="" + for (( i=0; i<${#s}; i++ )); do + ch="${s:$i:1}" + code="${SC[$ch]:-}" + if [[ -n "$code" ]]; then + [[ -n "$parts" ]] && parts+="," + parts+="$(tap_events "$code" "$HOLD_MS"),{\"delay\":$TYPE_GAP_MS}" + else + echo "inject.sh: skipping unsupported character '$ch'" >&2 + fi + done + [[ -z "$parts" ]] && return 0 + send_json "[$parts]" +} + +# ---- dispatch --------------------------------------------------------------- +cmd="${1:?command required (tap|key|doubletap|hold|type|enter)}"; shift || true +case "$cmd" in + tap|key) tap "$@" ;; + doubletap) doubletap "$@" ;; + hold) hold "$@" ;; + enter) tap 28 ;; + type) type_str "$@" ;; + *) echo "inject.sh: unknown command: $cmd" >&2; exit 2 ;; +esac diff --git a/scripts/parallels/launcher-smoke.sh b/scripts/parallels/launcher-smoke.sh new file mode 100755 index 00000000..64717bad --- /dev/null +++ b/scripts/parallels/launcher-smoke.sh @@ -0,0 +1,472 @@ +#!/usr/bin/env bash +# +# launcher-smoke.sh — ONE full launcher->terminal smoke run on a fresh Parallels VM. +# +# Flow under test: +# boot (run.sh --parallels) -> BWM ready -> double-tap SUPER opens the launcher +# (/bin/blauncher, pre-selecting APPS[0] = "Terminal") -> Enter launches the +# terminal (/bin/bterm). PASS requires REAL serial evidence that bterm started +# (its own '[bterm] config:' line) AND became functional (spawned its child +# shell, '[bterm] spawned child pid=') — never "launcher opened" alone. +# NB: blauncher launches bterm via fork+execv, which does NOT emit the kernel's +# "[spawn] path='...'" line — so we validate bterm's OWN startup logs, which are +# stronger proof (the binary actually ran and initialized) than a spawn record. +# +# Usage: +# scripts/parallels/launcher-smoke.sh [--no-build] [--keep-vm] +# [--timeout SECS] [--type-filter] +# +# Final stdout line is EXACTLY one of: +# RESULT: PASS (exit 0) +# RESULT: FAIL: (exit 1) +# +# Callers must run this un-sandboxed (a wrapper passes dangerouslyDisableSandbox); +# this script contains no sandbox logic. +# +set -euo pipefail + +# ============================================================================= +# INJECTION METHOD CONFIG — tune the trigger in ONE place. +# +# The launcher opens on a double-tap of the SUPER modifier. Breenix's USB-HID +# layer (kernel/src/drivers/usb/hid.rs) maps the Left-CTRL bit to SUPER, so +# injecting a plain Left-Ctrl tap registers as Super in the guest — this is +# literally why the operator calls it the "double control key", and it is the +# exact key Parallels delivers. +# +# We deliberately do NOT use the 0xE0 0x5B (left-GUI) extended scancode: Parallels +# Desktop 26.3.3 rejects a bare `--scancode 91` ("Invalid scan code sequence: 5B") +# and offers no way to send the extended pair as separate --scancode calls. Plain +# (non-extended) scancodes like Left-Ctrl (29) are accepted and map to SUPER. +# +# A "tap" = press/release of the code. A "double-tap" = two taps within 400 ms +# (INTER_TAP_MS gap + ~40 ms hold). To change the trigger, edit THESE values. +# ============================================================================= +SUPER_PREFIX= # none — Left-Ctrl is a basic, non-extended scancode +SUPER_CODE=29 # 0x1D Left-Ctrl; Breenix maps the Ctrl HID bit to SUPER +INTER_TAP_MS=150 # gap between the two taps (must be < 400 ms) +ENTER_CODE=28 # Enter / Return + +# ============================================================================= +# Other tunables +# ============================================================================= +# Interleave-robust readiness: concurrent serial writers (telnetd, etc.) can split +# a one-shot marker mid-line, so match EITHER the hotkeys-defaults line OR the +# recurring [bwm-fps] compositing line (printed ~180x/s once the desktop is live, +# so a clean, un-interleaved instance appears within milliseconds). Used with grep -aE. +READY_MARKER='bwm-fps|hotkeys: using built-in defaults' +LAUNCHER_MARKER="[spawn] path='/bin/blauncher'" +BTERM_CONFIG_MARKER='[bterm] config:' # bterm started + read its config +BTERM_SHELL_MARKER='[bterm] spawned child pid=' # bterm launched its child shell +WARMUP_SECS=60 # VirGL warmup after readiness marker +POST_SUPER_WAIT=1.5 # settle after double-Super before grepping for launcher +POST_ENTER_WAIT=3 # settle after Enter before grepping for bterm +FILTER_TEXT='term' # typed when --type-filter is set (Terminal stays index 0) + +# ============================================================================= +# Argument parsing +# ============================================================================= +NO_BUILD=0 +KEEP_VM=0 +OVERALL_TIMEOUT=1200 +TYPE_FILTER=0 +NO_BACKGROUND=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --no-build) NO_BUILD=1 ;; + --keep-vm) KEEP_VM=1 ;; + --type-filter) TYPE_FILTER=1 ;; + --no-background) NO_BACKGROUND=1 ;; + --timeout) OVERALL_TIMEOUT="${2:?--timeout needs a value}"; shift ;; + -h|--help) + grep '^#' "$0" | sed 's/^# \{0,1\}//' + exit 0 + ;; + *) echo "launcher-smoke.sh: unknown flag: $1" >&2; exit 2 ;; + esac + shift +done + +# ============================================================================= +# Paths +# ============================================================================= +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BREENIX_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +SERIAL_LOG="/tmp/breenix-parallels-serial.log" +INJECT="$SCRIPT_DIR/inject.sh" +CAPTURE="$SCRIPT_DIR/capture-display.sh" +RUN_SH="$BREENIX_ROOT/run.sh" + +RUN_TS="$(date +%Y%m%d-%H%M%S)" +EVIDENCE_DIR="$BREENIX_ROOT/logs/parallels-launcher-test/run-$RUN_TS" +mkdir -p "$EVIDENCE_DIR" +RESULT_FILE="$EVIDENCE_DIR/result.txt" +SERIAL_EXCERPT="$EVIDENCE_DIR/serial-excerpt.txt" +RUN_LOG="$EVIDENCE_DIR/run-sh.log" + +START_EPOCH="$(date +%s)" + +# State carried into cleanup / final report. +RUN_PID="" +VM_NAME="" +FINAL_REASON="" +CAFFEINATE_PID="" +VM_PROC_PID="" +# Inode of any pre-existing (stale, prior-run) serial log, captured before we +# launch run.sh. run.sh `rm -f`s the log and recreates it fresh on boot, which +# changes the inode; we refuse to trust any marker until the inode differs (or +# the file is gone), so a leftover prior-run marker can never be mis-read as +# readiness for THIS boot. +STALE_SERIAL_INODE="" + +log() { printf '[smoke %s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; } + +# ============================================================================= +# Cleanup trap — always kill the backgrounded run.sh; stop the VM unless --keep-vm. +# ============================================================================= +cleanup() { + local rc=$? + if [[ -n "$RUN_PID" ]] && kill -0 "$RUN_PID" 2>/dev/null; then + log "cleanup: killing run.sh pid $RUN_PID" + kill "$RUN_PID" 2>/dev/null || true + # run.sh spawns children (tail -f); reap the process group best-effort. + pkill -P "$RUN_PID" 2>/dev/null || true + fi + if [[ -n "$CAFFEINATE_PID" ]] && kill -0 "$CAFFEINATE_PID" 2>/dev/null; then + log "cleanup: killing caffeinate pid $CAFFEINATE_PID" + kill "$CAFFEINATE_PID" 2>/dev/null || true + fi + if [[ "$KEEP_VM" -eq 0 && -n "$VM_NAME" ]]; then + log "cleanup: stopping VM $VM_NAME" + prlctl stop "$VM_NAME" --kill >/dev/null 2>&1 || true + elif [[ -n "$VM_NAME" ]]; then + log "cleanup: --keep-vm set, leaving $VM_NAME running" + fi + return "$rc" +} +trap cleanup EXIT + +# Emit the single canonical RESULT line and exit. Also persists result.txt. +finish_pass() { + { + echo "RESULT: PASS" + echo "vm=$VM_NAME" + echo "type_filter=$TYPE_FILTER" + echo "evidence_dir=$EVIDENCE_DIR" + echo "elapsed_s=$(( $(date +%s) - START_EPOCH ))" + } > "$RESULT_FILE" + echo "RESULT: PASS" + exit 0 +} +finish_fail() { + FINAL_REASON="$1" + { + echo "RESULT: FAIL: $FINAL_REASON" + echo "vm=$VM_NAME" + echo "type_filter=$TYPE_FILTER" + echo "evidence_dir=$EVIDENCE_DIR" + echo "elapsed_s=$(( $(date +%s) - START_EPOCH ))" + } > "$RESULT_FILE" + echo "RESULT: FAIL: $FINAL_REASON" + exit 1 +} + +remaining_budget() { + local now elapsed + now="$(date +%s)" + elapsed=$(( now - START_EPOCH )) + echo $(( OVERALL_TIMEOUT - elapsed )) +} + +# Capture a screenshot into the evidence dir (best-effort; never fatal). +capture_evidence() { + local label="$1" + if [[ -x "$CAPTURE" && -n "$VM_NAME" ]]; then + log "capturing display ($label)" + BREENIX_CAPTURE_RETRY_SCHEDULE="5 15 30" \ + "$CAPTURE" "$VM_NAME" "$EVIDENCE_DIR/display-$label.png" \ + >/dev/null 2>>"$EVIDENCE_DIR/capture.log" || \ + log "capture ($label) failed (non-fatal); see capture.log" + fi +} + +# CPU-relief strategy (the operator uses this Mac during runs): keep the VM at +# LOW priority (renice 20) through the long boot/warmup/idle phases so it yields +# CPU to the operator's foreground apps under contention — but RESTORE it to +# normal priority for the brief, timing-sensitive double-tap injection window. +# +# We use renice ONLY (no `taskpolicy -b`): banishing the VM to efficiency cores +# starved the guest so hard it could not consume the two taps inside bwm's 400ms +# double-tap window (observed 1876ms => launcher never opened). renice keeps the +# VM on the performance cores at low priority (polite under contention) and is +# cleanly reversible, so the injection window stays responsive. No sudo needed. +background_vm_proc() { + [[ "$NO_BACKGROUND" -eq 1 ]] && return 0 + local pid + pid="$(pgrep -f 'prl_vm_app.*--vm-name breenix-' 2>/dev/null | head -1 || true)" + [[ -z "$pid" ]] && return 1 + VM_PROC_PID="$pid" + renice 20 -p "$pid" >/dev/null 2>&1 || true + log "lowered Breenix VM pid=$pid to nice 20 — yields CPU to your foreground apps under contention (stays on perf cores so injection stays responsive)" + return 0 +} + +# Restore the VM to normal priority for the timing-sensitive injection window. +foreground_vm_proc() { + [[ "$NO_BACKGROUND" -eq 1 ]] && return 0 + [[ -z "$VM_PROC_PID" ]] && return 0 + renice 0 -p "$VM_PROC_PID" >/dev/null 2>&1 || true + log "restored Breenix VM pid=$VM_PROC_PID to nice 0 for the double-tap injection window" +} + +ms_to_s() { awk "BEGIN{printf \"%.3f\", ${1}/1000}"; } + +# Current inode of the serial log, or empty if it does not exist. +serial_inode() { [[ -e "$SERIAL_LOG" ]] && stat -f '%i' "$SERIAL_LOG" 2>/dev/null || true; } + +# True only once the serial log is the FRESH one run.sh created for this boot: +# either the stale file is gone, or its inode changed since we captured it. +serial_is_fresh() { + local cur + cur="$(serial_inode)" + [[ -z "$cur" ]] && return 1 # not (re)created yet + [[ -z "$STALE_SERIAL_INODE" ]] && return 0 # no stale file existed at all + [[ "$cur" != "$STALE_SERIAL_INODE" ]] +} + +# ============================================================================= +# Preflight +# ============================================================================= +[[ -x "$INJECT" ]] || finish_fail "missing/non-executable inject helper at $INJECT" +[[ -x "$RUN_SH" ]] || finish_fail "missing/non-executable run.sh at $RUN_SH" +command -v prlctl >/dev/null 2>&1 || finish_fail "prlctl not found on PATH" + +# ============================================================================= +# Locked-screen preflight + caffeinate keep-alive. +# +# Hard requirement: macOS must NOT be locked. When the console is locked, +# Parallels detaches the VM window and silently drops every injected +# keystroke (send-key-event returns rc=0 but the key never reaches the guest). +# This is NOT a TCC/permissions issue — injection goes through the virtual +# xHCI HID via prl_disp_service, not macOS CGEvent — so there is no +# non-interactive bypass. We therefore refuse to run on a locked Mac. +# +# The lock check must never crash the run on its own (missing python/Quartz, +# headless CI, etc.): if the check itself errors, we warn and proceed. +# ============================================================================= +LOCK_CHECK_RC=2 +if command -v python3 >/dev/null 2>&1; then + # Run the probe as an if-condition: it exits 1 when UNLOCKED (the normal, + # required state), and a bare non-zero command would trip `set -e` before we + # could read $?. As a condition, `set -e` is exempt and the else-branch sees + # the real exit code. 0 = LOCKED, 1 = UNLOCKED, other = probe errored. + if python3 -c "import Quartz,sys; d=Quartz.CGSessionCopyCurrentDictionary(); sys.exit(0 if (d and d.get('CGSSessionScreenIsLocked')) else 1)" >/dev/null 2>&1; then + LOCK_CHECK_RC=0 + else + LOCK_CHECK_RC=$? + fi +else + log "WARNING: python3 not found; skipping macOS lock check (proceeding)" +fi + +case "$LOCK_CHECK_RC" in + 0) + echo "RESULT: FAIL: macOS screen is locked — Parallels drops injected keyboard input with no presented console. Unlock the Mac at the console, run 'caffeinate -d &', then retry." + exit 1 + ;; + 1) + log "lock check: macOS screen is unlocked" + ;; + *) + log "WARNING: lock check failed to run (no Quartz / errored); proceeding without it" + ;; +esac + +# Serial-only guard: these runs MUST be serial. run.sh kills any existing breenix +# VM before creating its own, so an overlapping run would destroy an in-flight VM +# (and two VMs would fight the dispatcher). Refuse to start if one is already up. +EXISTING_VM="$(prlctl list 2>/dev/null | awk '/breenix-/{print $NF}' | head -1 || true)" +if [[ -n "$EXISTING_VM" ]]; then + echo "RESULT: FAIL: a Breenix VM ($EXISTING_VM) is already running — launcher-smoke runs must be SERIAL (one VM at a time). Stop it (prlctl stop $EXISTING_VM --kill) and retry." + exit 1 +fi + +# Keep the display awake for the duration of the (long) run so the screen +# never auto-locks/sleeps mid-injection. Best-effort: a missing caffeinate +# must not abort the run. Killed in cleanup. +if command -v caffeinate >/dev/null 2>&1; then + caffeinate -d & + CAFFEINATE_PID=$! + log "started caffeinate -d (pid $CAFFEINATE_PID) to keep the display awake" +else + log "WARNING: caffeinate not found; display may sleep/lock during a long run" +fi + +# ============================================================================= +# (a) Launch run.sh --parallels in the BACKGROUND. run.sh tails serial forever, +# so it must be backgrounded; we kill it in cleanup. +# ============================================================================= +# Snapshot the inode of any leftover serial log from a previous run BEFORE we +# launch run.sh, so the readiness poll can tell "fresh log from this boot" apart +# from "stale log that already contains a prior run's readiness marker". +STALE_SERIAL_INODE="$(serial_inode)" +if [[ -n "$STALE_SERIAL_INODE" ]]; then + log "stale serial log present (inode $STALE_SERIAL_INODE); will wait for run.sh to recreate it" +fi + +RUN_ARGS=(--parallels) +[[ "$NO_BUILD" -eq 1 ]] && RUN_ARGS+=(--no-build) +log "launching: $RUN_SH ${RUN_ARGS[*]} (background)" +nohup "$RUN_SH" "${RUN_ARGS[@]}" >"$RUN_LOG" 2>&1 & +RUN_PID=$! +log "run.sh pid=$RUN_PID, log=$RUN_LOG" + +# ============================================================================= +# (b) Poll the serial log for the readiness marker, bounded by the overall timeout. +# run.sh removes the serial log fresh on boot, so any match is from THIS boot. +# ============================================================================= +log "waiting for readiness marker: $READY_MARKER" +READY=0 +BG_DONE=0 +while :; do + if [[ "$(remaining_budget)" -le "$WARMUP_SECS" ]]; then + log "timed out waiting for readiness marker" + break + fi + if ! kill -0 "$RUN_PID" 2>/dev/null; then + finish_fail "run.sh exited before readiness (see $RUN_LOG)" + fi + # As soon as the VM process exists, drop it to background priority so it does + # not fight the operator's foreground apps for CPU (injection stays foreground). + if [[ "$BG_DONE" -eq 0 ]] && background_vm_proc; then BG_DONE=1; fi + # Only trust the marker once the serial log is the fresh one run.sh created + # for THIS boot — never a leftover prior-run log that may already contain it. + if serial_is_fresh && grep -qaE -- "$READY_MARKER" "$SERIAL_LOG"; then + READY=1 + break + fi + sleep 3 +done +[[ "$READY" -eq 1 ]] || finish_fail "readiness marker not seen within timeout ($READY_MARKER)" +log "readiness marker seen" + +# ============================================================================= +# (c) Resolve the VM name (breenix-) created by THIS run.sh. +# +# Authoritative source: run.sh prints `VM: breenix-` to its stdout +# (captured in RUN_LOG) AFTER it has created and started that exact VM. Reading +# it from RUN_LOG is immune to leftover/stuck breenix-* VMs that run.sh failed +# to delete. Fall back to the prlctl-list heuristic only if RUN_LOG has no such +# line (e.g. run.sh output format changed). +# ============================================================================= +VM_NAME="$(grep -oE 'breenix-[0-9]+' "$RUN_LOG" 2>/dev/null | tail -1 || true)" +if [[ -n "$VM_NAME" ]]; then + log "resolved VM from run.sh output: $VM_NAME" +else + VM_NAME="$(prlctl list -a 2>/dev/null | grep -o 'breenix-[0-9]\+' | tail -1 || true)" + [[ -n "$VM_NAME" ]] || finish_fail "could not resolve a breenix-* VM (no name in $RUN_LOG, none via prlctl list -a)" + log "resolved VM via prlctl fallback: $VM_NAME" +fi +export VM="$VM_NAME" + +# ============================================================================= +# (d) VirGL warmup. +# ============================================================================= +log "VirGL warmup: sleeping ${WARMUP_SECS}s" +sleep "$WARMUP_SECS" +capture_evidence "pre-trigger" + +# ============================================================================= +# (e) Record the serial line count, inject double-Super, then look for the +# launcher marker in the tail since that line. +# ============================================================================= +serial_lines() { [[ -f "$SERIAL_LOG" ]] && wc -l <"$SERIAL_LOG" | tr -d ' ' || echo 0; } + +# Restore full VM priority for the timing-sensitive injection + launch window +# (it ran low-priority through the long boot/warmup for CPU relief). +foreground_vm_proc +BASE_LINE="$(serial_lines)" +log "serial line baseline: $BASE_LINE" + +log "injecting double-Super (prefix=$SUPER_PREFIX code=$SUPER_CODE gap=${INTER_TAP_MS}ms)" +INJ_T0="$(python3 -c 'import time;print(int(time.time()*1000))' 2>/dev/null || echo 0)" +"$INJECT" doubletap "$SUPER_CODE" "$INTER_TAP_MS" "$SUPER_PREFIX" \ + || finish_fail "inject doubletap failed (key injection error — see 'Host prerequisites & known limitations' in README)" +INJ_T1="$(python3 -c 'import time;print(int(time.time()*1000))' 2>/dev/null || echo 0)" +INJ_MS=$(( INJ_T1 - INJ_T0 )) +# The double-tap is sent as a SINGLE `prlctl send-key-event -j` batch, so the +# inter-tap spacing (INTER_TAP_MS) is applied by the dispatcher precisely and is +# INDEPENDENT of this wall-time. INJ_MS is just prlctl's one-call overhead — it +# can be large under host load WITHOUT affecting whether the taps land in bwm's +# 400ms window. (Pre-batching, 4 separate prlctl spawns made INJ_MS == the tap +# spacing and blew the window on a loaded host; batching fixed that.) +log "double-tap injected as one -j batch; prlctl wall-time ${INJ_MS}ms (inter-tap spacing dispatcher-controlled at ${INTER_TAP_MS}ms, load-independent)" + +sleep "$(ms_to_s "$(awk "BEGIN{printf \"%d\", $POST_SUPER_WAIT*1000}")")" + +# Grep only the lines appended since BASE_LINE. +tail_since() { [[ -f "$SERIAL_LOG" ]] && tail -n +"$(( BASE_LINE + 1 ))" "$SERIAL_LOG" || true; } + +if tail_since | grep -qF -- "$LAUNCHER_MARKER"; then + log "launcher opened (saw $LAUNCHER_MARKER)" +else + capture_evidence "no-launcher" + tail_since > "$SERIAL_EXCERPT" || true + # Distinguish a real kernel crash from a dropped double-tap (honest reporting). + if tail_since | grep -qE '\[UNHANDLED_EC\]|\[FATAL_POSTMORTEM\]|kernel panic'; then + finish_fail "KERNEL FAULT before launcher opened: $(tail_since | grep -E '\[UNHANDLED_EC\]|\[FATAL_POSTMORTEM\]' | head -1) — real Breenix crash, NOT a harness/injection issue" + fi + finish_fail "launcher did not open after double-Super (no '$LAUNCHER_MARKER') — double-tap not registered by bwm (likely BWM/HID input intermittency; injection was batched + dispatcher-timed)" +fi + +# ============================================================================= +# (f) Optionally type the filter, then Enter; look for the bterm oracles. +# Terminal is APPS[0] so it stays selected whether or not we filter. +# ============================================================================= +if [[ "$TYPE_FILTER" -eq 1 ]]; then + log "typing filter text '$FILTER_TEXT'" + "$INJECT" type "$FILTER_TEXT" \ + || finish_fail "inject type '$FILTER_TEXT' failed (key injection error)" + sleep 0.5 +fi + +log "pressing Enter (code=$ENTER_CODE)" +"$INJECT" key "$ENTER_CODE" \ + || finish_fail "inject Enter failed (key injection error)" + +sleep "$POST_ENTER_WAIT" +capture_evidence "post-enter" + +# Save the full tail-since excerpt as evidence regardless of outcome. +tail_since > "$SERIAL_EXCERPT" || true + +# ============================================================================= +# (g)/(h) Honest oracle: PASS requires BOTH bterm's own startup config line AND +# its child-shell spawn line — i.e. the terminal launched AND loaded a +# working shell. Launcher-only, or a half-initialized bterm, is a FAIL. +# ============================================================================= +SAW_BTERM_CONFIG=0 +SAW_BTERM_SHELL=0 +tail_since | grep -qF -- "$BTERM_CONFIG_MARKER" && SAW_BTERM_CONFIG=1 +tail_since | grep -qF -- "$BTERM_SHELL_MARKER" && SAW_BTERM_SHELL=1 + +if [[ "$SAW_BTERM_CONFIG" -eq 1 && "$SAW_BTERM_SHELL" -eq 1 ]]; then + log "terminal launched + loaded: saw '$BTERM_CONFIG_MARKER' AND '$BTERM_SHELL_MARKER'" + finish_pass +fi + +# A kernel fault during the Enter->fork/exec->bterm path (e.g. EC=0xe Illegal +# Execution State on a secondary CPU) presents as "launcher opened, bterm never +# came up". Detect + report it distinctly from a benign no-launch. +if tail_since | grep -qE '\[UNHANDLED_EC\]|\[FATAL_POSTMORTEM\]|kernel panic'; then + finish_fail "KERNEL FAULT during terminal launch: $(tail_since | grep -E '\[UNHANDLED_EC\]|\[FATAL_POSTMORTEM\]' | head -1) — real Breenix crash on the bterm fork/exec path (clone-exec/TTBR0 territory), NOT a harness/timing issue" +fi + +if [[ "$SAW_BTERM_CONFIG" -eq 1 ]]; then + finish_fail "bterm started ('$BTERM_CONFIG_MARKER') but did not spawn its shell ('$BTERM_SHELL_MARKER') — terminal did not finish loading" +elif [[ "$SAW_BTERM_SHELL" -eq 1 ]]; then + finish_fail "saw '$BTERM_SHELL_MARKER' but no '$BTERM_CONFIG_MARKER' (inconsistent evidence)" +else + finish_fail "launcher opened but terminal did not launch (no '$BTERM_CONFIG_MARKER' after Enter)" +fi diff --git a/userspace/programs/src/bwm.rs b/userspace/programs/src/bwm.rs index e8ddbde1..d9d25a91 100644 --- a/userspace/programs/src/bwm.rs +++ b/userspace/programs/src/bwm.rs @@ -396,12 +396,26 @@ impl HotkeyManager { /// Called every frame with the current modifier bitmask and whether a /// non-modifier key was pressed this frame. Returns an action if a /// hotkey matched. - fn update(&mut self, current_mods: u8, key_pressed: Option) -> Option { + /// + /// `super_taps` is the count of Super press-edges latched in the kernel HID + /// path since the previous frame (op=31, read-and-clear). Because the latch + /// captures every 0→1 SUPER transition the instant the HID report arrives, + /// it recovers taps whose entire ~30ms high window fell between two bursty + /// compositor-wait polls — which the level-based edge detection would miss. + /// SUPER tap counting is driven exclusively by this latch so each physical + /// press is counted exactly once (no double-count vs. release detection). + fn update( + &mut self, + current_mods: u8, + key_pressed: Option, + super_taps: u32, + ) -> Option { if self.cooldown > 0 { self.cooldown -= 1; } - // Track if any non-modifier key was pressed while modifiers are held + // Track if any non-modifier key was pressed while modifiers are held. + // A combo (modifier + key) must NOT trigger the no-key double-tap launcher. if key_pressed.is_some() && current_mods != 0 { self.combo_used = true; } @@ -424,26 +438,78 @@ impl HotkeyManager { } } - // Detect modifier-only transitions for multi-tap detection - // Check each modifier bit for press/release edges - for &mod_bit in &[modifier::SUPER, modifier::ALT, modifier::CTRL, modifier::SHIFT] { + // ── Super multi-tap detection driven by the kernel press-edge latch ── + // Each latched press-edge is one physical tap. If the press arrived while + // a combo was in progress (a non-modifier key was held with Super), the + // tap is treated as dirty and resets the sequence rather than counting. + if super_taps > 0 { + for _ in 0..super_taps { + if self.combo_used { + // Combo in progress: this Super press is part of a combo, not + // a clean tap. Reset the tap sequence; do not fire the launcher. + self.tap_count = 0; + self.tap_release_ns = 0; + continue; + } + + self.tap_modifier = modifier::SUPER; + + let now_ns = match libbreenix::time::now_monotonic() { + Ok(ts) => ts.tv_sec as u64 * 1_000_000_000 + ts.tv_nsec as u64, + Err(_) => 0, + }; + + // Count this tap; continue a sequence only if the previous tap + // was within the 400ms window, otherwise start a fresh sequence. + if self.tap_count > 0 + && now_ns.saturating_sub(self.tap_release_ns) < 400_000_000 + { + self.tap_count += 1; + } else { + self.tap_count = 1; + } + self.tap_release_ns = now_ns; + + // Fire the matching multi-tap binding (e.g. double-tap Super). + if self.cooldown == 0 { + for binding in &self.bindings { + if binding.key == 0 + && binding.modifiers == modifier::SUPER + && binding.taps == self.tap_count + { + self.cooldown = 30; + self.tap_count = 0; + self.tap_release_ns = 0; + return Some(binding.action.clone()); + } + } + } + } + } + + // Reset combo tracking when Super is fully released so the next clean + // tap sequence isn't suppressed by a stale combo flag. + let super_was = (prev & modifier::SUPER) != 0; + let super_now = (current_mods & modifier::SUPER) != 0; + if !super_now && super_was { + self.combo_used = false; + } + + // ── Multi-tap detection for ALT / CTRL / SHIFT via level edges ── + // (Super is handled above by the latch.) These modifiers are not affected + // by the launcher drop bug; keep their existing release-edge behavior. + for &mod_bit in &[modifier::ALT, modifier::CTRL, modifier::SHIFT] { let was = (prev & mod_bit) != 0; let now = (current_mods & mod_bit) != 0; if now && !was { - // Modifier just pressed - if mod_bit == self.tap_modifier { - // Same modifier as we're tracking — continue counting - } else { - // Different modifier — reset + if mod_bit != self.tap_modifier { self.tap_modifier = mod_bit; self.tap_count = 0; } self.combo_used = false; } else if !now && was { - // Modifier just released if mod_bit == self.tap_modifier && !self.combo_used { - // Clean release (no other keys pressed during hold) let now_ns = match libbreenix::time::now_monotonic() { Ok(ts) => ts.tv_sec as u64 * 1_000_000_000 + ts.tv_nsec as u64, Err(_) => 0, @@ -456,7 +522,6 @@ impl HotkeyManager { } self.tap_release_ns = now_ns; - // Check for multi-tap bindings if self.cooldown == 0 { for binding in &self.bindings { if binding.key == 0 @@ -470,12 +535,9 @@ impl HotkeyManager { } } } - } else { - // Dirty release (combo was used) — reset - if mod_bit == self.tap_modifier { - self.tap_count = 0; - self.tap_release_ns = 0; - } + } else if mod_bit == self.tap_modifier { + self.tap_count = 0; + self.tap_release_ns = 0; } } } @@ -491,6 +553,38 @@ fn trim(s: &[u8]) -> &[u8] { &s[start..end] } +/// Read-and-clear the kernel's latched count of Super press-edges (FBDRAW op=31). +/// +/// The kernel HID path increments a lock-free atomic on every 0→1 SUPER +/// transition the instant the report arrives, so a tap whose high window fell +/// entirely between two compositor-wait polls is still counted. This drains +/// that latch so missed taps reach the double-tap detector. +#[cfg(target_arch = "aarch64")] +fn take_super_tap_count() -> u32 { + use libbreenix::graphics::FbDrawCmd; + use libbreenix::syscall::nr; + let cmd = FbDrawCmd { + op: 31, + p1: 0, + p2: 0, + p3: 0, + p4: 0, + color: 0, + }; + let ret = + unsafe { libbreenix::raw::syscall1(nr::FBDRAW, &cmd as *const FbDrawCmd as u64) as i64 }; + if ret < 0 { + 0 + } else { + ret as u32 + } +} + +#[cfg(not(target_arch = "aarch64"))] +fn take_super_tap_count() -> u32 { + 0 +} + // ─── Resize Edge ──────────────────────────────────────────────────────────── #[derive(Clone, Copy, PartialEq)] @@ -1538,8 +1632,13 @@ fn main() { }; // ── 0b. Poll modifier state and check hotkeys ── + // Drain the kernel's latched Super press-edge count (op=31) every frame + // — including frames where compositor_wait was skipped — so a tap that + // completed between polls is fed into double-tap detection and the + // keyboard-ready latch can't busy-loop compositor_wait. + let super_taps = take_super_tap_count(); let current_mods = graphics::poll_modifier_state() as u8; - if let Some(action) = hotkey_mgr.update(current_mods, None) { + if let Some(action) = hotkey_mgr.update(current_mods, None, super_taps) { match &action { HotkeyAction::FocusNext => { if !windows.is_empty() {