@@ -86,7 +86,8 @@ def _save_baselines(data: dict) -> None:
8686
8787def _check_or_update (name : str , timing : dict , update : bool ,
8888 fail_ratio : float = FAIL_RATIO ,
89- warn_ratio : float = WARN_RATIO ) -> None :
89+ warn_ratio : float = WARN_RATIO ,
90+ ignore_hardware : bool = False ) -> None :
9091 """Assert timing is within threshold of stored baseline, or write it.
9192
9293 Parameters
@@ -99,6 +100,15 @@ def _check_or_update(name: str, timing: dict, update: bool,
99100 benchmarks use 2.5× because Playwright mouse-event timing
100101 is more variable under OS scheduler load.
101102 warn_ratio : ratio above which a warning (not failure) is emitted.
103+ ignore_hardware : when True, treat the current machine as matching the
104+ baseline host and apply full fail/warn behaviour.
105+
106+ Hardware matching
107+ -----------------
108+ When the current hostname differs from ``_meta.host`` in the baseline file
109+ the test still runs and compares, but any result that would normally be a
110+ *failure* is downgraded to a *warning*. Pass ``--ignore-hardware`` to
111+ restore full fail behaviour regardless of hostname.
102112 """
103113 if timing is None :
104114 pytest .skip (f"[{ name } ] No timing data returned (panel not found?)" )
@@ -127,18 +137,33 @@ def _check_or_update(name: str, timing: dict, update: bool,
127137 f"[{ name } ] No baseline — run with --update-benchmarks to create one"
128138 )
129139
140+ # Determine whether we're on the same hardware as the baseline.
141+ meta = baselines .get ("_meta" , {})
142+ baseline_host = meta .get ("host" )
143+ current_host = socket .gethostname ()
144+ hw_match = ignore_hardware or not baseline_host or (baseline_host == current_host )
145+ hw_note = (
146+ ""
147+ if hw_match
148+ else f" [different hardware: baseline={ baseline_host !r} , current={ current_host !r} ]"
149+ )
150+
130151 baseline = baselines [name ]
131152 ratio = timing ["mean_ms" ] / baseline ["mean_ms" ]
132153
133154 if ratio > fail_ratio :
134- pytest . fail (
155+ msg = (
135156 f"[{ name } ] REGRESSION: mean { timing ['mean_ms' ]:.2f} ms vs "
136- f"baseline { baseline ['mean_ms' ]:.2f} ms ({ ratio :.2f} ×)"
157+ f"baseline { baseline ['mean_ms' ]:.2f} ms ({ ratio :.2f} ×){ hw_note } "
137158 )
138- if ratio > warn_ratio :
159+ if hw_match :
160+ pytest .fail (msg )
161+ else :
162+ warnings .warn (msg , stacklevel = 2 )
163+ elif ratio > warn_ratio :
139164 warnings .warn (
140165 f"[{ name } ] Perf degraded: mean { timing ['mean_ms' ]:.2f} ms vs "
141- f"baseline { baseline ['mean_ms' ]:.2f} ms ({ ratio :.2f} ×)" ,
166+ f"baseline { baseline ['mean_ms' ]:.2f} ms ({ ratio :.2f} ×){ hw_note } " ,
142167 stacklevel = 2 ,
143168 )
144169
@@ -163,7 +188,7 @@ def _check_or_update(name: str, timing: dict, update: bool,
163188 _IMSHOW_SIZES ,
164189 ids = [f"{ h } x{ w } " for h , w , _ in _IMSHOW_SIZES ],
165190)
166- def test_bench_imshow (h , w , is_slow , bench_page , update_benchmarks , run_slow ):
191+ def test_bench_imshow (h , w , is_slow , bench_page , update_benchmarks , run_slow , ignore_hardware ):
167192 """Render-time benchmark: imshow with {h}×{w} image data."""
168193 if is_slow and not run_slow :
169194 pytest .skip (f"Skipping { h } ×{ w } in fast CI — pass --run-slow to include" )
@@ -189,7 +214,8 @@ def test_bench_imshow(h, w, is_slow, bench_page, update_benchmarks, run_slow):
189214 timeout = timeout_ms ,
190215 )
191216
192- _check_or_update (f"js_imshow_{ h } x{ w } " , timing , update_benchmarks )
217+ _check_or_update (f"js_imshow_{ h } x{ w } " , timing , update_benchmarks ,
218+ ignore_hardware = ignore_hardware )
193219
194220
195221# ── 1D plot benchmarks ────────────────────────────────────────────────────────
@@ -198,7 +224,7 @@ def test_bench_imshow(h, w, is_slow, bench_page, update_benchmarks, run_slow):
198224
199225
200226@pytest .mark .parametrize ("n_pts" , _PLOT1D_SIZES , ids = [str (n ) for n in _PLOT1D_SIZES ])
201- def test_bench_plot1d (n_pts , bench_page , update_benchmarks ):
227+ def test_bench_plot1d (n_pts , bench_page , update_benchmarks , ignore_hardware ):
202228 """Render-time benchmark: plot1d with {n_pts} points."""
203229 rng = np .random .default_rng (1 )
204230 fig , ax = apl .subplots (1 , 1 , figsize = (640 , 320 ))
@@ -215,7 +241,8 @@ def test_bench_plot1d(n_pts, bench_page, update_benchmarks):
215241 n_samples = 15 ,
216242 )
217243
218- _check_or_update (f"js_plot1d_{ n_pts } pts" , timing , update_benchmarks )
244+ _check_or_update (f"js_plot1d_{ n_pts } pts" , timing , update_benchmarks ,
245+ ignore_hardware = ignore_hardware )
219246
220247
221248# ── pcolormesh benchmarks ─────────────────────────────────────────────────────
@@ -224,7 +251,7 @@ def test_bench_plot1d(n_pts, bench_page, update_benchmarks):
224251
225252
226253@pytest .mark .parametrize ("n" , _MESH_SIZES , ids = [f"{ n } x{ n } " for n in _MESH_SIZES ])
227- def test_bench_pcolormesh (n , bench_page , update_benchmarks ):
254+ def test_bench_pcolormesh (n , bench_page , update_benchmarks , ignore_hardware ):
228255 """Render-time benchmark: pcolormesh with {n}×{n} grid."""
229256 rng = np .random .default_rng (2 )
230257 xe = np .linspace (0.0 , 1.0 , n + 1 )
@@ -245,12 +272,13 @@ def test_bench_pcolormesh(n, bench_page, update_benchmarks):
245272 n_samples = 15 ,
246273 )
247274
248- _check_or_update (f"js_pcolormesh_{ n } x{ n } " , timing , update_benchmarks )
275+ _check_or_update (f"js_pcolormesh_{ n } x{ n } " , timing , update_benchmarks ,
276+ ignore_hardware = ignore_hardware )
249277
250278
251279# ── 3D surface benchmark ──────────────────────────────────────────────────────
252280
253- def test_bench_plot3d (bench_page , update_benchmarks ):
281+ def test_bench_plot3d (bench_page , update_benchmarks , ignore_hardware ):
254282 """Render-time benchmark: 3D surface (rotation interaction path)."""
255283 x = np .linspace (- 2.0 , 2.0 , 48 )
256284 y = np .linspace (- 2.0 , 2.0 , 48 )
@@ -272,13 +300,14 @@ def test_bench_plot3d(bench_page, update_benchmarks):
272300 n_samples = 15 ,
273301 )
274302
275- _check_or_update ("js_plot3d_48x48" , timing , update_benchmarks )
303+ _check_or_update ("js_plot3d_48x48" , timing , update_benchmarks ,
304+ ignore_hardware = ignore_hardware )
276305
277306
278307# ── bar chart benchmark ───────────────────────────────────────────────────────
279308
280309@pytest .mark .parametrize ("n_bars" , [10 , 100 ], ids = ["10bars" , "100bars" ])
281- def test_bench_bar (n_bars , bench_page , update_benchmarks ):
310+ def test_bench_bar (n_bars , bench_page , update_benchmarks , ignore_hardware ):
282311 """Render-time benchmark: bar chart with {n_bars} bars."""
283312 rng = np .random .default_rng (3 )
284313 fig , ax = apl .subplots (1 , 1 , figsize = (640 , 320 ))
@@ -295,12 +324,13 @@ def test_bench_bar(n_bars, bench_page, update_benchmarks):
295324 n_samples = 15 ,
296325 )
297326
298- _check_or_update (f"js_bar_{ n_bars } bars" , timing , update_benchmarks )
327+ _check_or_update (f"js_bar_{ n_bars } bars" , timing , update_benchmarks ,
328+ ignore_hardware = ignore_hardware )
299329
300330
301331# ── interaction: 2D pan ───────────────────────────────────────────────────────
302332
303- def test_bench_interaction_2d_pan (bench_page , update_benchmarks ):
333+ def test_bench_interaction_2d_pan (bench_page , update_benchmarks , ignore_hardware ):
304334 """Interaction benchmark: 2D pan drag (20 mousemove events on 512² image)."""
305335 rng = np .random .default_rng (4 )
306336 fig , ax = apl .subplots (1 , 1 , figsize = (512 + _PAD_L + _PAD_R ,
@@ -343,9 +373,9 @@ def test_bench_interaction_2d_pan(bench_page, update_benchmarks):
343373
344374 timing = page .evaluate (f"() => window._aplTiming && window._aplTiming['{ panel_id } ']" )
345375 _check_or_update ("js_interaction_2d_pan" , timing , update_benchmarks ,
346- fail_ratio = 2.5 , warn_ratio = 1.75 )
376+ fail_ratio = 2.5 , warn_ratio = 1.75 , ignore_hardware = ignore_hardware )
347377
348- def test_bench_interaction_2d_zoom (bench_page , update_benchmarks ):
378+ def test_bench_interaction_2d_zoom (bench_page , update_benchmarks , ignore_hardware ):
349379 """Interaction benchmark: 2D wheel zoom (20 wheel events on 512² image)."""
350380 rng = np .random .default_rng (5 )
351381 fig , ax = apl .subplots (1 , 1 , figsize = (512 + _PAD_L + _PAD_R ,
@@ -376,9 +406,9 @@ def test_bench_interaction_2d_zoom(bench_page, update_benchmarks):
376406
377407 timing = page .evaluate (f"() => window._aplTiming && window._aplTiming['{ panel_id } ']" )
378408 _check_or_update ("js_interaction_2d_zoom" , timing , update_benchmarks ,
379- fail_ratio = 2.5 , warn_ratio = 1.75 )
409+ fail_ratio = 2.5 , warn_ratio = 1.75 , ignore_hardware = ignore_hardware )
380410
381- def test_bench_interaction_1d_pan (bench_page , update_benchmarks ):
411+ def test_bench_interaction_1d_pan (bench_page , update_benchmarks , ignore_hardware ):
382412 """Interaction benchmark: 1D pan drag (20 mousemove events, 10K points)."""
383413 rng = np .random .default_rng (6 )
384414 pw , ph = 640 , 320
@@ -414,6 +444,4 @@ def test_bench_interaction_1d_pan(bench_page, update_benchmarks):
414444
415445 timing = page .evaluate (f"() => window._aplTiming && window._aplTiming['{ panel_id } ']" )
416446 _check_or_update ("js_interaction_1d_pan" , timing , update_benchmarks ,
417- fail_ratio = 2.5 , warn_ratio = 1.75 )
418-
419-
447+ fail_ratio = 2.5 , warn_ratio = 1.75 , ignore_hardware = ignore_hardware )
0 commit comments