Skip to content

Commit 1faa055

Browse files
committed
fft comparison wip
1 parent a3cac0b commit 1faa055

1 file changed

Lines changed: 77 additions & 76 deletions

File tree

src/dsp/fft_comparison.clj

Lines changed: 77 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
[tech.v3.datatype :as dtype]
1414
[tech.v3.datatype.functional :as dfn]
1515
[tablecloth.api :as tc]
16-
[scicloj.tableplot.v1.plotly :as plotly])
16+
[scicloj.tableplot.v1.plotly :as plotly]
17+
[criterium.core :as crit])
1718
(:import [org.apache.commons.math3.transform FastFourierTransformer
1819
DftNormalization
1920
TransformType]
@@ -289,43 +290,39 @@
289290

290291
;; Small signal (128 samples):
291292

292-
(def bench-small-128
293-
(let [n 1000]
294-
{:apache-commons (benchmark-fft fft-apache-commons signal n)
295-
:jdsp (benchmark-fft fft-jdsp signal n)
296-
:jtransforms (benchmark-fft fft-jtransforms signal n)
297-
:fastmath (benchmark-fft fft-fastmath signal n)}))
298-
299-
(kind/table
300-
[{:library "Apache Commons Math"
301-
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:apache-commons :per-iter-ms]))}
302-
{:library "jdsp"
303-
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jdsp :per-iter-ms]))}
304-
{:library "JTransforms"
305-
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jtransforms :per-iter-ms]))}
306-
{:library "fastmath"
307-
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:fastmath :per-iter-ms]))}])
293+
(let [bench-small-128 (let [n 1000]
294+
{:apache-commons (benchmark-fft fft-apache-commons signal n)
295+
:jdsp (benchmark-fft fft-jdsp signal n)
296+
:jtransforms (benchmark-fft fft-jtransforms signal n)
297+
:fastmath (benchmark-fft fft-fastmath signal n)})]
298+
(kind/table
299+
[{:library "Apache Commons Math"
300+
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:apache-commons :per-iter-ms]))}
301+
{:library "jdsp"
302+
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jdsp :per-iter-ms]))}
303+
{:library "JTransforms"
304+
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:jtransforms :per-iter-ms]))}
305+
{:library "fastmath"
306+
:time-per-fft (format "%.3f ms" (get-in bench-small-128 [:fastmath :per-iter-ms]))}]))
308307

309308
;; Larger signal (2^17 = 131,072 samples):
310309

311310
(def signal-large (generate-test-signal 131072))
312311

313-
(def bench-large-131k
314-
(let [n 10]
315-
{:apache-commons (benchmark-fft fft-apache-commons signal-large n)
316-
:jdsp (benchmark-fft fft-jdsp signal-large n)
317-
:jtransforms (benchmark-fft fft-jtransforms signal-large n)
318-
:fastmath (benchmark-fft fft-fastmath signal-large n)}))
319-
320-
(kind/table
321-
[{:library "Apache Commons Math"
322-
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:apache-commons :per-iter-ms]))}
323-
{:library "jdsp"
324-
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jdsp :per-iter-ms]))}
325-
{:library "JTransforms"
326-
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jtransforms :per-iter-ms]))}
327-
{:library "fastmath"
328-
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:fastmath :per-iter-ms]))}])
312+
(let [bench-large-131k (let [n 10]
313+
{:apache-commons (benchmark-fft fft-apache-commons signal-large n)
314+
:jdsp (benchmark-fft fft-jdsp signal-large n)
315+
:jtransforms (benchmark-fft fft-jtransforms signal-large n)
316+
:fastmath (benchmark-fft fft-fastmath signal-large n)})]
317+
(kind/table
318+
[{:library "Apache Commons Math"
319+
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:apache-commons :per-iter-ms]))}
320+
{:library "jdsp"
321+
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jdsp :per-iter-ms]))}
322+
{:library "JTransforms"
323+
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:jtransforms :per-iter-ms]))}
324+
{:library "fastmath"
325+
:time-per-fft (format "%.3f ms" (get-in bench-large-131k [:fastmath :per-iter-ms]))}]))
329326

330327
;; ## Understanding Parallelization Performance
331328

@@ -339,56 +336,60 @@
339336
'org.jtransforms.utils.CommonUtils)
340337

341338
(defn benchmark-with-threads
342-
"Benchmark FFT at specific thread count."
343-
[n-threads signal n-iterations]
344-
(ConcurrencyUtils/setNumberOfThreads n-threads)
345-
(let [start (System/nanoTime)
346-
_ (dotimes [_ n-iterations]
347-
(fft-fastmath signal))
348-
end (System/nanoTime)
349-
elapsed-ms (/ (- end start) 1e6)]
350-
{:threads n-threads
351-
:per-iter-ms (/ elapsed-ms n-iterations)}))
339+
"Benchmark FFT at specific thread count using criterium for statistical analysis."
340+
[n-threads signal]
341+
(let [previous-threads (ConcurrencyUtils/getNumberOfThreads)]
342+
(try
343+
(ConcurrencyUtils/setNumberOfThreads n-threads)
344+
;; Use criterium's quick-bench for proper JVM warmup and statistics
345+
(let [result (crit/quick-benchmark* (fn [] (fft-fastmath signal)) {})]
346+
{:threads n-threads
347+
;; Criterium returns [value (lower-ci upper-ci)] for each metric
348+
:mean-ms (* (first (:mean result)) 1e3) ; Convert seconds to milliseconds
349+
:variance-ms (* (first (:variance result)) 1e6) ; Variance in ms^2
350+
:lower-q-ms (* (first (:lower-q result)) 1e3)
351+
:upper-q-ms (* (first (:upper-q result)) 1e3)})
352+
(finally
353+
(ConcurrencyUtils/setNumberOfThreads previous-threads)))))
352354

353355
; Test signals at different sizes (powers of 2)
354356
(def test-signals
355357
{:size-16k (generate-test-signal 16384)
356358
:size-131k (generate-test-signal 131072)
357359
:size-524k (generate-test-signal 524288)})
358360

359-
(def thread-counts [1 2 4 8 16])
360-
361-
; Run comprehensive benchmark
362-
(def thread-performance
363-
(for [size-key [:size-16k :size-131k :size-524k]
364-
n-threads thread-counts]
365-
(let [sig (get test-signals size-key)
366-
n-samples (count sig)
367-
n-iterations (if (< n-samples 100000) 50 10)
368-
result (benchmark-with-threads n-threads sig n-iterations)]
369-
(assoc result
370-
:signal-size (case size-key
371-
:size-16k "16K (2^14)"
372-
:size-131k "131K (2^17)"
373-
:size-524k "524K (2^19)")
374-
:n-samples n-samples))))
375-
376-
; Reset to system default
377-
(ConcurrencyUtils/setNumberOfThreads (.availableProcessors (Runtime/getRuntime)))
378-
379-
; Visualize results
380-
(-> (tc/dataset thread-performance)
381-
(plotly/base {:=x :threads
382-
:=y :per-iter-ms
383-
:=color :signal-size
384-
:=title "FFT Performance vs Thread Count (fastmath/JTransforms)"
385-
:=x-title "Number of Threads"
386-
:=y-title "Time per FFT (ms)"
387-
:=width 800
388-
:=height 500})
389-
(plotly/layer-point {:=mark-size 10})
390-
(plotly/layer-line {:=mark-opacity 0.6})
391-
plotly/plot)
361+
;; **Important limitation**: According to [Wendykier & Grote (2012)](https://www.math.emory.edu/technical-reports/techrep-00127.pdf),
362+
;; JTransforms 1D FFT can only use **2 or 4 threads maximum**. The algorithm's decomposition
363+
;; strategy doesn't parallelize beyond this for one-dimensional transforms.
364+
;; (2D and 3D transforms can use more threads, but we're testing 1D here.)
365+
(def thread-counts [1 2 4])
366+
367+
; Run comprehensive benchmark with criterium
368+
; Note: This will take several minutes as criterium performs proper JVM warmup and statistical analysis
369+
(let [thread-performance (for [size-key [:size-16k :size-131k :size-524k]
370+
n-threads thread-counts]
371+
(let [sig (get test-signals size-key)
372+
n-samples (count sig)
373+
result (benchmark-with-threads n-threads sig)]
374+
(assoc result
375+
:signal-size (case size-key
376+
:size-16k "16K (2^14)"
377+
:size-131k "131K (2^17)"
378+
:size-524k "524K (2^19)")
379+
:n-samples n-samples)))]
380+
; Visualize results
381+
(-> (tc/dataset thread-performance)
382+
(plotly/base {:=x :threads
383+
:=y :mean-ms
384+
:=color :signal-size
385+
:=title "FFT Performance vs Thread Count (fastmath/JTransforms)"
386+
:=x-title "Number of Threads"
387+
:=y-title "Mean Time per FFT (ms)"
388+
:=width 800
389+
:=height 500})
390+
(plotly/layer-point {:=mark-size 10})
391+
(plotly/layer-line {:=mark-opacity 0.6})
392+
plotly/plot))
392393

393394
;; ### Why Limited Speedup?
394395

0 commit comments

Comments
 (0)