diff --git a/backlog/tasks/task-260 - Fix-packaged-app-hangs-on-Restoring-session...-due-to-fsevents-less-chokidar-fd-exhaustion.md b/backlog/tasks/task-260 - Fix-packaged-app-hangs-on-Restoring-session...-due-to-fsevents-less-chokidar-fd-exhaustion.md new file mode 100644 index 00000000..5a756831 --- /dev/null +++ b/backlog/tasks/task-260 - Fix-packaged-app-hangs-on-Restoring-session...-due-to-fsevents-less-chokidar-fd-exhaustion.md @@ -0,0 +1,31 @@ +--- +id: TASK-260 +title: >- + Fix: packaged app hangs on 'Restoring session...' due to fsevents-less + chokidar fd exhaustion +status: Done +assignee: [] +created_date: '2026-06-23 11:35' +updated_date: '2026-06-23 11:35' +labels: [] +dependencies: [] +--- + +## Description + + +Packaged (dist) builds hung forever on the SessionLoading screen while 'npm start' worked. Root cause: chokidar's macOS FSEvents backend needs the native 'fsevents' optionalDependency, which was present in dev node_modules but never bundled into the packaged app by the forge postPackage hook. Without it, chokidar (with usePolling:false) falls back to one fs.watch fd per directory; on accounts with thousands of ~/.copilot session dirs (this user: 4,972) that exhausts the file-descriptor limit (EMFILE: too many open files, watch), saturating the main event loop so renderer startup IPC stalls and isRestoring never clears. Finder/Dock launches inherit a 256 fd soft limit, compounding it. Fix: bundle fsevents in forge.config.ts postPackage (like node-pty/better-sqlite3); externalize fsevents in vite.main.config.ts; add utils/fsevents.ts canUseNativeRecursiveWatch() and use it in copilot- and claude-code-session-watcher to fall back to bounded stat-polling when fsevents is unavailable (defense-in-depth so a missing native module degrades instead of hanging). + + +## Acceptance Criteria + +- [x] #1 Packaged macOS app bundles fsevents and copilot-watcher logs usePolling=false with zero EMFILE +- [x] #2 When fsevents is absent, watchers degrade to usePolling=true (bounded polling) without an EMFILE flood or hang +- [x] #3 Packaged app loads past 'Restoring session...' and restores the saved multi-pane session + + +## Final Summary + + +Shipped. Bundled fsevents into the packaged app (forge.config.ts postPackage), externalized it (vite.main.config.ts), and added utils/fsevents.ts canUseNativeRecursiveWatch() used by both session watchers to fall back to bounded polling when fsevents can't load. Verified on a real package: usePolling=false + 0 EMFILE with fsevents, graceful usePolling=true fallback without it; both watchers reach 'ready' and the app restores its 4-pane session past the loading screen. 115 unit tests pass. + diff --git a/forge.config.ts b/forge.config.ts index 23194e4e..7f5b80d5 100644 --- a/forge.config.ts +++ b/forge.config.ts @@ -98,6 +98,22 @@ const config: ForgeConfig = { } } + // fsevents is chokidar's macOS native backend (optionalDependency). When + // present, chokidar uses a SINGLE efficient FSEvents watcher for the whole + // session-state tree. When absent, chokidar with `usePolling:false` falls + // back to per-directory fs.watch — which opens one fd per watched dir. On + // accounts with thousands of ~/.copilot session dirs that instantly + // exhausts file descriptors ("EMFILE: too many open files, watch"), + // saturating the main event loop so renderer startup IPC stalls and the + // app hangs forever on "Restoring session...". Bundling fsevents restores + // the FSEvents path. macOS-only: the module simply won't exist on Win/Linux + // build machines, so the existsSync guard skips it there. + const fseventsSrc = path.join(__dirname, 'node_modules', 'fsevents'); + const fseventsDest = path.join(appDir, 'node_modules', 'fsevents'); + if (fs.existsSync(fseventsSrc) && !fs.existsSync(fseventsDest)) { + await fs.copy(fseventsSrc, fseventsDest); + } + // Copy the assets folder (icons, clawpilot.png, etc.) into the // packaged app. Main-process code resolves these via app.getAppPath() // + assets/ for notification icons and similar runtime assets. diff --git a/package-lock.json b/package-lock.json index e9653c2c..98f105fc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "tmax", - "version": "1.11.0", + "version": "1.11.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "tmax", - "version": "1.11.0", + "version": "1.11.1", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/main/claude-code-session-watcher.ts b/src/main/claude-code-session-watcher.ts index ff653eed..04823ef5 100644 --- a/src/main/claude-code-session-watcher.ts +++ b/src/main/claude-code-session-watcher.ts @@ -2,6 +2,7 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; import * as chokidar from 'chokidar'; import type { FSWatcher } from 'chokidar'; +import { canUseNativeRecursiveWatch } from './utils/fsevents'; const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.jsonl$/i; @@ -119,11 +120,18 @@ export class ClaudeCodeSessionWatcher { } private async startNativeWithHotPoll(): Promise { - console.log(`[claude-code-watcher] start() basePath=${this.basePath} mode=native+hotpoll`); + // See copilot-session-watcher.ts / utils/fsevents.ts: a non-polling + // recursive watch on macOS without fsevents opens one fd per dir and + // exhausts the descriptor limit (EMFILE). Fall back to bounded polling. + const nativeSafe = canUseNativeRecursiveWatch(); + console.log(`[claude-code-watcher] start() basePath=${this.basePath} mode=native+hotpoll usePolling=${!nativeSafe}`); + if (!nativeSafe) { + console.warn('[claude-code-watcher] fsevents unavailable — using bounded polling to avoid fd exhaustion (EMFILE)'); + } try { this.watcher = chokidar.watch(this.basePath, { - usePolling: false, + usePolling: !nativeSafe, // Depth 2: ~/.claude/projects//.jsonl depth: 2, ignoreInitial: true, diff --git a/src/main/copilot-session-watcher.ts b/src/main/copilot-session-watcher.ts index 2cb8caa5..5816b4a1 100644 --- a/src/main/copilot-session-watcher.ts +++ b/src/main/copilot-session-watcher.ts @@ -2,6 +2,7 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; import * as chokidar from 'chokidar'; import type { FSWatcher } from 'chokidar'; +import { canUseNativeRecursiveWatch } from './utils/fsevents'; export interface CopilotWatcherCallbacks { onEventsChanged: (sessionId: string) => void; @@ -122,11 +123,22 @@ export class CopilotSessionWatcher { * directory-level watch plus a tiny per-file mtime poll over the hot set. */ private async startNativeWithHotPoll(): Promise { - console.log(`[copilot-watcher] start() basePath=${this.basePath} mode=native+hotpoll`); + // On macOS, a non-polling recursive watch is only safe when fsevents loads + // (single FSEvents watcher). Without it, chokidar opens one fs.watch fd per + // session dir and exhausts the descriptor limit (EMFILE) on accounts with + // thousands of sessions, wedging startup. Fall back to bounded stat-polling + // (no persistent fds) in that case. See utils/fsevents.ts for the full + // rationale. After the packaging fix bundles fsevents this fallback should + // never trigger on macOS. + const nativeSafe = canUseNativeRecursiveWatch(); + console.log(`[copilot-watcher] start() basePath=${this.basePath} mode=native+hotpoll usePolling=${!nativeSafe}`); + if (!nativeSafe) { + console.warn('[copilot-watcher] fsevents unavailable — using bounded polling to avoid fd exhaustion (EMFILE)'); + } try { this.watcher = chokidar.watch(this.basePath, { - usePolling: false, + usePolling: !nativeSafe, // Depth 2 covers ~/.copilot//(events.jsonl|workspace.yaml). // chokidar's depth counts recursion levels; depth: 2 is generous and // keeps us from accidentally missing files if Copilot CLI ever puts a diff --git a/src/main/main.ts b/src/main/main.ts index b70564c3..fefeb773 100644 --- a/src/main/main.ts +++ b/src/main/main.ts @@ -1766,24 +1766,62 @@ app.whenReady().then(() => { }); }); +// Idempotent teardown of everything that holds an OS resource or a native +// handle. Crucially this closes the chokidar/fsevents watchers: fsevents' +// N-API threadsafe function must be released while the libuv loop is still +// alive. If it's released during Node env teardown (node::Stop) it aborts +// (SIGABRT) in fse_instance_destroy -> napi_release_threadsafe_function -> +// uv_mutex_lock. Calling watcher.close()/stop() here (via Native.stop) +// releases it early and safely. +let didShutdown = false; +async function shutdownResources(): Promise { + if (didShutdown) return; + didShutdown = true; + try { ptyManager?.killAll(); } catch { /* ignore */ } + try { await sessionFileWatcher?.close(); } catch { /* ignore */ } + sessionFileWatcher = null; + try { await copilotWatcher?.stop(); } catch { /* ignore */ } + try { copilotMonitor?.dispose(); } catch { /* ignore */ } + try { await claudeCodeWatcher?.stop(); } catch { /* ignore */ } + try { claudeCodeMonitor?.dispose(); } catch { /* ignore */ } + try { await wslSessionManager?.stop(); } catch { /* ignore */ } + try { versionChecker?.stop(); } catch { /* ignore */ } + clearNotificationCooldowns(); +} + +// Quit is gated so async resource teardown always finishes before the process +// exits. Without this, Cmd-Q / SIGTERM tears down the Node env with the +// fsevents watcher still active and the app crashes (SIGABRT) on exit. Once +// the watchers are closed (TSFN released) we re-issue the quit and let the +// normal teardown run — this preserves will-quit and the renderer's +// beforeunload session-save. +let quitGated = false; +app.on('before-quit', (event) => { + if (quitGated) return; // second pass — let the real quit proceed + event.preventDefault(); + quitGated = true; + // Don't let a stalled watcher.close() hang the quit forever. + const timeout = new Promise((resolve) => setTimeout(resolve, 2000)); + Promise.race([shutdownResources(), timeout]).finally(() => { + app.quit(); + }); +}); + app.on('will-quit', () => { try { globalShortcut.unregisterAll(); } catch { /* ignore */ } }); -app.on('window-all-closed', async () => { - // Note: we deliberately do NOT delete the clipboard temp dir here. Image - // paths inserted into the terminal stay clickable across restarts only - // if the files survive the close. Stale files are reaped by the 6h - // per-file sweep in sweepStaleClipboardDirs() on next startup. - ptyManager?.killAll(); - try { await sessionFileWatcher?.close(); } catch { /* ignore */ } - sessionFileWatcher = null; - await copilotWatcher?.stop(); - copilotMonitor?.dispose(); - await claudeCodeWatcher?.stop(); - claudeCodeMonitor?.dispose(); - await wslSessionManager?.stop(); - versionChecker?.stop(); - clearNotificationCooldowns(); +app.on('window-all-closed', () => { + // Route through before-quit so watcher teardown happens before the env is + // torn down (see shutdownResources). app.quit() re-enters before-quit. app.quit(); }); + +// External termination (kill, parent terminal closing, CI) would otherwise +// tear down the Node env with the fsevents watcher still active and crash on +// exit. Route signals through the same gated quit so cleanup always runs. +for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) { + process.on(sig, () => { + app.quit(); + }); +} diff --git a/src/main/utils/fsevents.ts b/src/main/utils/fsevents.ts new file mode 100644 index 00000000..877ac21d --- /dev/null +++ b/src/main/utils/fsevents.ts @@ -0,0 +1,52 @@ +// Shared probe for chokidar's macOS FSEvents backend. +// +// chokidar only uses the single, cheap FSEvents watcher for a directory tree +// when the native `fsevents` module is loadable. When it isn't (e.g. it wasn't +// bundled into a packaged build) AND we pass `usePolling: false`, chokidar +// silently falls back to opening one `fs.watch` file descriptor per directory. +// On accounts with thousands of ~/.copilot / ~/.claude session dirs that +// instantly exhausts the process file-descriptor limit: +// +// Error: EMFILE: too many open files, watch +// +// The flood saturates the main event loop, so the renderer's startup IPC stalls +// and the app hangs forever on "Restoring session...". (Finder/Dock-launched +// apps make it worse: they inherit a 256 fd soft limit, vs the high ulimit a +// terminal-launched `npm start` gets.) +// +// `canUseNativeRecursiveWatch()` lets callers decide whether forcing +// `usePolling: false` is safe. The packaged app now bundles `fsevents`, so on +// macOS this returns true; this probe is the safety net that keeps a missing +// fsevents from turning into a hang — we degrade to bounded stat-polling +// (no persistent fds) instead. + +let cached: boolean | null = null; + +/** Whether the native `fsevents` module can be required in this process. */ +export function isFseventsLoadable(): boolean { + if (cached !== null) return cached; + try { + // Resolved at runtime from node_modules (marked external to the bundler). + require('fsevents'); + cached = true; + } catch { + cached = false; + } + return cached; +} + +/** + * Whether chokidar can recursively watch a large directory tree with + * `usePolling: false` without exhausting file descriptors. + * + * - Windows: `fs.watch` supports a single native recursive watch → safe. + * - Linux: chokidar uses inotify (one cheap watch per dir, high default limit) + * — this was the pre-existing behavior, kept unchanged. + * - macOS: safe ONLY when `fsevents` loads (single FSEvents watcher). Without + * it, per-directory `fs.watch` exhausts fds, so callers should fall back to + * polling instead. + */ +export function canUseNativeRecursiveWatch(): boolean { + if (process.platform !== 'darwin') return true; + return isFseventsLoadable(); +} diff --git a/vite.main.config.ts b/vite.main.config.ts index ac44430c..b43a4b56 100644 --- a/vite.main.config.ts +++ b/vite.main.config.ts @@ -7,6 +7,11 @@ export default defineConfig({ "node-pty", "chokidar", "better-sqlite3", + // Probed at runtime via require('fsevents') in utils/fsevents.ts to + // decide whether chokidar can safely use a non-polling native watch. + // Keep it external so the bundler doesn't try to inline the native + // module (it's darwin-only and absent on Win/Linux build machines). + "fsevents", ], }, },