From af2f66eb273957383f5eebf78d7f245a8f921ddc Mon Sep 17 00:00:00 2001 From: Miguel Angel Simon Sierra Date: Tue, 30 Jun 2026 17:40:19 -0700 Subject: [PATCH] fix(engine): retry probe on pollHfReady zero-duration timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renders were failing outright with "[FrameCapture] Composition has zero duration. Runtime ready: false, ..." whenever window.__renderReady didn't flip true within playerReadyTimeout (45s) — most often under host contention (e.g. several renders running concurrently), never from a defect in the composition itself. Confirmed by re-running an affected composition standalone: it succeeded immediately (initMs ~3.5-4.4s vs. the 45s timeout it hit under concurrent load). The probe stage already retries once with a fresh browser session for exactly this class of "succeeds on retry" infra flakiness (frame detachment, disconnects, navigation timeouts, launch failures), but isTransientBrowserError didn't recognize this message, so it fell through to an immediate, unretried failure. Match "Composition has zero duration ... Runtime ready: false" as transient. Left the "Runtime ready: true" case (pollHfReady's fast-fail: no GSAP timeline and no data-duration) unmatched — that's a genuine authoring bug, not a timing fluke, and should keep failing fast. --- .../frameCapture-transientErrors.test.ts | 8 ++++ packages/engine/src/services/frameCapture.ts | 8 ++++ .../services/render/stages/probeStage.test.ts | 44 +++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/packages/engine/src/services/frameCapture-transientErrors.test.ts b/packages/engine/src/services/frameCapture-transientErrors.test.ts index 3d4ebdd409..65abef0869 100644 --- a/packages/engine/src/services/frameCapture-transientErrors.test.ts +++ b/packages/engine/src/services/frameCapture-transientErrors.test.ts @@ -15,6 +15,10 @@ describe("isTransientBrowserError", () => { "Failed to launch the browser process! TROUBLESHOOTING: https://pptr.dev/troubleshooting", "connect ECONNREFUSED 127.0.0.1:9222", "Navigation timeout of 60000 ms exceeded", + // pollHfReady timed out before window.__renderReady flipped true — the + // classic symptom of a slow/contended host (e.g. several renders running + // concurrently); a fresh browser session on retry usually clears it. + "[FrameCapture] Composition has zero duration.\n Runtime ready: false, __player: true, __hf.seek: true, GSAP timeline: true, data-duration: 53.3s", ])("returns true for transient error: %s", (message) => { expect(isTransientBrowserError(new Error(message))).toBe(true); }); @@ -25,6 +29,10 @@ describe("isTransientBrowserError", () => { "Composition duration is 0", "SYSTEM_FONT_USED: -apple-system", "", + // The runtime finished initializing (renderReady: true) and still reports + // zero duration — a genuine authoring bug (no timeline, no data-duration), + // not a transient host hiccup. Must keep fast-failing without a retry. + "[FrameCapture] Composition has zero duration.\n Runtime ready: true, __player: true, __hf.seek: true, GSAP timeline: false, data-duration: not set", ])("returns false for non-transient error: %s", (message) => { expect(isTransientBrowserError(new Error(message))).toBe(false); }); diff --git a/packages/engine/src/services/frameCapture.ts b/packages/engine/src/services/frameCapture.ts index d6f81d0a21..45558d47f4 100644 --- a/packages/engine/src/services/frameCapture.ts +++ b/packages/engine/src/services/frameCapture.ts @@ -1975,6 +1975,14 @@ const TRANSIENT_BROWSER_ERROR_PATTERNS = [ /Failed to launch the browser process/i, /Navigation timeout of \d+ ms exceeded/i, /ECONNREFUSED/i, + // pollHfReady's own timeout — thrown when window.__renderReady never flips + // true within playerReadyTimeout. "Runtime ready: false" means init simply + // didn't finish in time (commonly a slow/contended host, e.g. several + // concurrent renders), which a fresh session usually clears on retry. This + // is distinct from the "Runtime ready: true" fast-fail case a few lines up + // in pollHfReady (no timeline + no data-duration) — that's a genuine + // authoring bug and intentionally NOT matched here, so it still fails fast. + /Composition has zero duration[\s\S]*Runtime ready: false/, ]; export function isTransientBrowserError(error: unknown): boolean { diff --git a/packages/producer/src/services/render/stages/probeStage.test.ts b/packages/producer/src/services/render/stages/probeStage.test.ts index e1124756f5..5ca0d64dce 100644 --- a/packages/producer/src/services/render/stages/probeStage.test.ts +++ b/packages/producer/src/services/render/stages/probeStage.test.ts @@ -69,6 +69,7 @@ mock.module("@hyperframes/engine", () => ({ // live in frameCapture-transientErrors.test.ts — update both if patterns change. isTransientBrowserError: (error: unknown) => { const msg = error instanceof Error ? error.message : String(error); + if (/Composition has zero duration[\s\S]*Runtime ready: false/.test(msg)) return true; return /Navigating frame was detached|Target closed|Session closed|browser has disconnected|Page crashed|Execution context was destroyed|Cannot find context with specified id|Failed to launch the browser process|Navigation timeout of \d+ ms exceeded|ECONNREFUSED/i.test( msg, ); @@ -337,6 +338,49 @@ describe("runProbeStage — transient browser error retry (#1687)", () => { expect(closeCaptureSessionCallCount).toBe(2); }); + it("retries once on a pollHfReady zero-duration timeout (renderReady: false) and succeeds", async () => { + resetRetryMocks(); + capturedCfgs.length = 0; + initializeSessionError = new Error( + "[FrameCapture] Composition has zero duration.\n Runtime ready: false, __player: true, __hf.seek: true, GSAP timeline: true, data-duration: 53.3s", + ); + initializeSessionFailUntilAttempt = 1; + + const { runProbeStage } = await import("./probeStage.js"); + const input = makeProbeInput({ cfgForceScreenshot: false, stageForceScreenshot: false }); + + const result = await runProbeStage(input); + + expect(initializeSessionCallCount).toBe(2); + expect(closeCaptureSessionCallCount).toBe(1); + expect(result.duration).toBe(5); + expect(result.probeSession).not.toBeNull(); + }); + + it("throws immediately on a permanent zero-duration error (renderReady: true — genuine authoring bug)", async () => { + resetRetryMocks(); + capturedCfgs.length = 0; + initializeSessionError = new Error( + "[FrameCapture] Composition has zero duration.\n Runtime ready: true, __player: true, __hf.seek: true, GSAP timeline: false, data-duration: not set", + ); + initializeSessionFailUntilAttempt = 999; + + const { runProbeStage } = await import("./probeStage.js"); + const input = makeProbeInput({ cfgForceScreenshot: false, stageForceScreenshot: false }); + + let caught: unknown; + try { + await runProbeStage(input); + } catch (err) { + caught = err; + } + + expect(caught).toBeInstanceOf(Error); + expect((caught as Error).message).toContain("Runtime ready: true"); + expect(initializeSessionCallCount).toBe(1); + expect(closeCaptureSessionCallCount).toBe(1); + }); + it("retries on a transient browser LAUNCH failure (createCaptureSession throws)", async () => { resetRetryMocks(); capturedCfgs.length = 0;