|
13 | 13 | [tech.v3.datatype :as dtype] |
14 | 14 | [tech.v3.datatype.functional :as dfn] |
15 | 15 | [tablecloth.api :as tc] |
16 | | - [scicloj.tableplot.v1.plotly :as plotly]) |
| 16 | + [scicloj.tableplot.v1.plotly :as plotly] |
| 17 | + [criterium.core :as crit]) |
17 | 18 | (:import [org.apache.commons.math3.transform FastFourierTransformer |
18 | 19 | DftNormalization |
19 | 20 | TransformType] |
|
289 | 290 |
|
290 | 291 | ;; Small signal (128 samples): |
291 | 292 |
|
292 | | -(def bench-small-128 |
293 | | - (let [n 1000] |
294 | | - {:apache-commons (benchmark-fft fft-apache-commons signal n) |
295 | | - :jdsp (benchmark-fft fft-jdsp signal n) |
296 | | - :jtransforms (benchmark-fft fft-jtransforms signal n) |
297 | | - :fastmath (benchmark-fft fft-fastmath signal n)})) |
298 | | - |
299 | | -(kind/table |
300 | | - [{:library "Apache Commons Math" |
301 | | - :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:apache-commons :per-iter-ms]))} |
302 | | - {:library "jdsp" |
303 | | - :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jdsp :per-iter-ms]))} |
304 | | - {:library "JTransforms" |
305 | | - :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jtransforms :per-iter-ms]))} |
306 | | - {:library "fastmath" |
307 | | - :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:fastmath :per-iter-ms]))}]) |
| 293 | +(let [bench-small-128 (let [n 1000] |
| 294 | + {:apache-commons (benchmark-fft fft-apache-commons signal n) |
| 295 | + :jdsp (benchmark-fft fft-jdsp signal n) |
| 296 | + :jtransforms (benchmark-fft fft-jtransforms signal n) |
| 297 | + :fastmath (benchmark-fft fft-fastmath signal n)})] |
| 298 | + (kind/table |
| 299 | + [{:library "Apache Commons Math" |
| 300 | + :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:apache-commons :per-iter-ms]))} |
| 301 | + {:library "jdsp" |
| 302 | + :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jdsp :per-iter-ms]))} |
| 303 | + {:library "JTransforms" |
| 304 | + :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jtransforms :per-iter-ms]))} |
| 305 | + {:library "fastmath" |
| 306 | + :time-per-fft (format "%.3f ms" (get-in bench-small-128 [:fastmath :per-iter-ms]))}])) |
308 | 307 |
|
309 | 308 | ;; Larger signal (2^17 = 131,072 samples): |
310 | 309 |
|
311 | 310 | (def signal-large (generate-test-signal 131072)) |
312 | 311 |
|
313 | | -(def bench-large-131k |
314 | | - (let [n 10] |
315 | | - {:apache-commons (benchmark-fft fft-apache-commons signal-large n) |
316 | | - :jdsp (benchmark-fft fft-jdsp signal-large n) |
317 | | - :jtransforms (benchmark-fft fft-jtransforms signal-large n) |
318 | | - :fastmath (benchmark-fft fft-fastmath signal-large n)})) |
319 | | - |
320 | | -(kind/table |
321 | | - [{:library "Apache Commons Math" |
322 | | - :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:apache-commons :per-iter-ms]))} |
323 | | - {:library "jdsp" |
324 | | - :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jdsp :per-iter-ms]))} |
325 | | - {:library "JTransforms" |
326 | | - :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jtransforms :per-iter-ms]))} |
327 | | - {:library "fastmath" |
328 | | - :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:fastmath :per-iter-ms]))}]) |
| 312 | +(let [bench-large-131k (let [n 10] |
| 313 | + {:apache-commons (benchmark-fft fft-apache-commons signal-large n) |
| 314 | + :jdsp (benchmark-fft fft-jdsp signal-large n) |
| 315 | + :jtransforms (benchmark-fft fft-jtransforms signal-large n) |
| 316 | + :fastmath (benchmark-fft fft-fastmath signal-large n)})] |
| 317 | + (kind/table |
| 318 | + [{:library "Apache Commons Math" |
| 319 | + :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:apache-commons :per-iter-ms]))} |
| 320 | + {:library "jdsp" |
| 321 | + :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jdsp :per-iter-ms]))} |
| 322 | + {:library "JTransforms" |
| 323 | + :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jtransforms :per-iter-ms]))} |
| 324 | + {:library "fastmath" |
| 325 | + :time-per-fft (format "%.3f ms" (get-in bench-large-131k [:fastmath :per-iter-ms]))}])) |
329 | 326 |
|
330 | 327 | ;; ## Understanding Parallelization Performance |
331 | 328 |
|
|
339 | 336 | 'org.jtransforms.utils.CommonUtils) |
340 | 337 |
|
341 | 338 | (defn benchmark-with-threads |
342 | | - "Benchmark FFT at specific thread count." |
343 | | - [n-threads signal n-iterations] |
344 | | - (ConcurrencyUtils/setNumberOfThreads n-threads) |
345 | | - (let [start (System/nanoTime) |
346 | | - _ (dotimes [_ n-iterations] |
347 | | - (fft-fastmath signal)) |
348 | | - end (System/nanoTime) |
349 | | - elapsed-ms (/ (- end start) 1e6)] |
350 | | - {:threads n-threads |
351 | | - :per-iter-ms (/ elapsed-ms n-iterations)})) |
| 339 | + "Benchmark FFT at specific thread count using criterium for statistical analysis." |
| 340 | + [n-threads signal] |
| 341 | + (let [previous-threads (ConcurrencyUtils/getNumberOfThreads)] |
| 342 | + (try |
| 343 | + (ConcurrencyUtils/setNumberOfThreads n-threads) |
| 344 | + ;; Use criterium's quick-bench for proper JVM warmup and statistics |
| 345 | + (let [result (crit/quick-benchmark* (fn [] (fft-fastmath signal)) {})] |
| 346 | + {:threads n-threads |
| 347 | + ;; Criterium returns [value (lower-ci upper-ci)] for each metric |
| 348 | + :mean-ms (* (first (:mean result)) 1e3) ; Convert seconds to milliseconds |
| 349 | + :variance-ms (* (first (:variance result)) 1e6) ; Variance in ms^2 |
| 350 | + :lower-q-ms (* (first (:lower-q result)) 1e3) |
| 351 | + :upper-q-ms (* (first (:upper-q result)) 1e3)}) |
| 352 | + (finally |
| 353 | + (ConcurrencyUtils/setNumberOfThreads previous-threads))))) |
352 | 354 |
|
353 | 355 | ; Test signals at different sizes (powers of 2) |
354 | 356 | (def test-signals |
355 | 357 | {:size-16k (generate-test-signal 16384) |
356 | 358 | :size-131k (generate-test-signal 131072) |
357 | 359 | :size-524k (generate-test-signal 524288)}) |
358 | 360 |
|
359 | | -(def thread-counts [1 2 4 8 16]) |
360 | | - |
361 | | -; Run comprehensive benchmark |
362 | | -(def thread-performance |
363 | | - (for [size-key [:size-16k :size-131k :size-524k] |
364 | | - n-threads thread-counts] |
365 | | - (let [sig (get test-signals size-key) |
366 | | - n-samples (count sig) |
367 | | - n-iterations (if (< n-samples 100000) 50 10) |
368 | | - result (benchmark-with-threads n-threads sig n-iterations)] |
369 | | - (assoc result |
370 | | - :signal-size (case size-key |
371 | | - :size-16k "16K (2^14)" |
372 | | - :size-131k "131K (2^17)" |
373 | | - :size-524k "524K (2^19)") |
374 | | - :n-samples n-samples)))) |
375 | | - |
376 | | -; Reset to system default |
377 | | -(ConcurrencyUtils/setNumberOfThreads (.availableProcessors (Runtime/getRuntime))) |
378 | | - |
379 | | -; Visualize results |
380 | | -(-> (tc/dataset thread-performance) |
381 | | - (plotly/base {:=x :threads |
382 | | - :=y :per-iter-ms |
383 | | - :=color :signal-size |
384 | | - :=title "FFT Performance vs Thread Count (fastmath/JTransforms)" |
385 | | - :=x-title "Number of Threads" |
386 | | - :=y-title "Time per FFT (ms)" |
387 | | - :=width 800 |
388 | | - :=height 500}) |
389 | | - (plotly/layer-point {:=mark-size 10}) |
390 | | - (plotly/layer-line {:=mark-opacity 0.6}) |
391 | | - plotly/plot) |
| 361 | +;; **Important limitation**: According to [Wendykier & Grote (2012)](https://www.math.emory.edu/technical-reports/techrep-00127.pdf), |
| 362 | +;; JTransforms 1D FFT can only use **2 or 4 threads maximum**. The algorithm's decomposition |
| 363 | +;; strategy doesn't parallelize beyond this for one-dimensional transforms. |
| 364 | +;; (2D and 3D transforms can use more threads, but we're testing 1D here.) |
| 365 | +(def thread-counts [1 2 4]) |
| 366 | + |
| 367 | +; Run comprehensive benchmark with criterium |
| 368 | +; Note: This will take several minutes as criterium performs proper JVM warmup and statistical analysis |
| 369 | +(let [thread-performance (for [size-key [:size-16k :size-131k :size-524k] |
| 370 | + n-threads thread-counts] |
| 371 | + (let [sig (get test-signals size-key) |
| 372 | + n-samples (count sig) |
| 373 | + result (benchmark-with-threads n-threads sig)] |
| 374 | + (assoc result |
| 375 | + :signal-size (case size-key |
| 376 | + :size-16k "16K (2^14)" |
| 377 | + :size-131k "131K (2^17)" |
| 378 | + :size-524k "524K (2^19)") |
| 379 | + :n-samples n-samples)))] |
| 380 | + ; Visualize results |
| 381 | + (-> (tc/dataset thread-performance) |
| 382 | + (plotly/base {:=x :threads |
| 383 | + :=y :mean-ms |
| 384 | + :=color :signal-size |
| 385 | + :=title "FFT Performance vs Thread Count (fastmath/JTransforms)" |
| 386 | + :=x-title "Number of Threads" |
| 387 | + :=y-title "Mean Time per FFT (ms)" |
| 388 | + :=width 800 |
| 389 | + :=height 500}) |
| 390 | + (plotly/layer-point {:=mark-size 10}) |
| 391 | + (plotly/layer-line {:=mark-opacity 0.6}) |
| 392 | + plotly/plot)) |
392 | 393 |
|
393 | 394 | ;; ### Why Limited Speedup? |
394 | 395 |
|
|
0 commit comments