1616 [tech.v3.datatype :as dtype]
1717 [tech.v3.datatype.functional :as dfn]
1818 [tech.v3.datatype.statistics :as stats]
19+ [tech.v3.datatype.convolve :as convolve]
1920 [tech.v3.tensor :as tensor]
2021 [tech.v3.libs.buffered-image :as bufimg]
2122 [tech.v3.dataset.tensor :as ds-tensor]
@@ -1150,28 +1151,27 @@ edges
11501151
11511152; ; So far we've used simple element-wise operations and direct pixel comparisons.
11521153; ; Now let's explore **convolution**, the fundamental operation behind blur, sharpen,
1153- ; ; and sophisticated edge detection.
1154+ ; ; and edge detection.
11541155; ;
1155- ; ; We'll build a reusable convolution engine using `tensor/compute-tensor` for
1156- ; ; windowed operations, then apply various kernels to see the dramatic effects.
1156+ ; ; **What we'll learn:**
1157+ ; ; - How convolution works (sliding kernels over images)
1158+ ; ; - Building a 2D convolution function for learning and non-separable filters
1159+ ; ; - Separable filters—the standard approach for Gaussian blur
1160+ ; ; - Practical applications: box blur, Gaussian blur, sharpening, edge detection
11571161
11581162; ; ## Understanding Convolution
11591163
11601164; ; [Convolution](https://en.wikipedia.org/wiki/Kernel_(image_processing)) is a
11611165; ; fundamental operation in image processing. A **kernel** (or filter) is a small
11621166; ; matrix that slides over the image. At each position, we multiply kernel values
11631167; ; by corresponding pixel values and sum the result.
1168+ ; ;
1169+ ; ; Let's see this with a simple example: box blur.
11641170
1165- ; ; Example: 3×3 box blur kernel (all pixels weighted equally):
1166- ; ; ```
1167- ; ; [1/9 1/9 1/9]
1168- ; ; [1/9 1/9 1/9]
1169- ; ; [1/9 1/9 1/9]
1170- ; ; ```
1171-
1172- ; ; ## Box Blur Kernel
1171+ ; ; ### Box Blur Example
11731172
1174- ; ; Create a simple averaging kernel:
1173+ ; ; Box blur averages all pixels in a neighborhood. A 3×3 box blur kernel weights
1174+ ; ; all 9 pixels equally:
11751175
11761176(defn box-blur-kernel
11771177 " Create NxN box blur kernel (uniform averaging).
@@ -1188,111 +1188,217 @@ edges
11881188
11891189kernel-3x3
11901190
1191- ; ; ## Applying Convolution
1191+ ; ; This kernel says: "Replace each pixel with the average of itself and its 8 neighbors."
1192+ ; ; To apply this kernel across the entire image, we need a convolution function.
11921193
1193- ; ; Slide the kernel over the image, computing weighted sums:
1194+ ; ; ## Building a 2D Convolution Function
1195+
1196+ ; ; We'll implement `convolve-2d` to understand the mechanics. This function is useful for:
1197+ ; ; - **Learning**: See exactly how convolution works
1198+ ; ; - **Non-separable filters**: Some kernels (like Sobel) can't be separated into 1D operations
1199+ ; ;
1200+ ; ; For separable filters like Gaussian blur, we'll use a more efficient approach (shown next).
11941201
11951202(defn convolve-2d
11961203 " Apply 2D convolution to grayscale image [H W].
11971204 kernel: [kh kw] float tensor
1198- Returns [H W] float32 tensor (zero-padded edges).
1205+ edge-mode: :zero (default) or :reflect
1206+ Returns [H W] float32 tensor.
1207+
1208+ This implementation prioritizes clarity over performance to demonstrate
1209+ tensor operations and convolution mechanics. It explicitly shows:
1210+ - Sliding window iteration with loop/recur
1211+ - Bounds checking and edge handling
1212+ - Element-wise kernel multiplication
1213+
1214+ This function is useful for learning and for non-separable kernels.
1215+ For separable filters like Gaussian blur, see gaussian-blur-separable below."
1216+ ([img-2d kernel] (convolve-2d img-2d kernel :zero ))
1217+ ([img-2d kernel edge-mode]
1218+ (let [[h w] (dtype/shape img-2d)
1219+ [kh kw] (dtype/shape kernel)
1220+ pad-h (quot kh 2 )
1221+ pad-w (quot kw 2 )
1222+
1223+ ; ; Helper to get pixel value with edge handling
1224+ get-pixel (case edge-mode
1225+ :zero (fn [y x]
1226+ (if (and (>= y 0 ) (< y h)
1227+ (>= x 0 ) (< x w))
1228+ (img-2d y x)
1229+ 0.0 ))
1230+ :reflect (fn [y x]
1231+ (let [ry (cond
1232+ (< y 0 ) (- y)
1233+ (>= y h) (- h (- y h) 2 )
1234+ :else y)
1235+ rx (cond
1236+ (< x 0 ) (- x)
1237+ (>= x w) (- w (- x w) 2 )
1238+ :else x)]
1239+ (img-2d (max 0 (min (dec h) ry))
1240+ (max 0 (min (dec w) rx))))))]
1241+ (tensor/compute-tensor
1242+ [h w]
1243+ (fn [y x]
1244+ ; ; Sum weighted pixel values in kernel neighborhood
1245+ (loop [ky 0
1246+ kx 0
1247+ sum 0.0 ]
1248+ (if (>= ky kh)
1249+ sum
1250+ (let [img-y (+ y ky (- pad-h))
1251+ img-x (+ x kx (- pad-w))
1252+ pixel-val (get-pixel img-y img-x)
1253+ new-sum (+ sum (* (kernel ky kx) pixel-val))
1254+ [next-ky next-kx] (if (>= (inc kx) kw)
1255+ [(inc ky) 0 ]
1256+ [ky (inc kx)])]
1257+ (recur next-ky next-kx new-sum)))))
1258+ :float32 ))))
1259+
1260+ ; ; ## Separable Filters: The Recommended Approach
1261+
1262+ ; ; Many important filters are [separable](https://en.wikipedia.org/wiki/Separable_filter)—they
1263+ ; ; can be decomposed into two 1D operations instead of one 2D operation. This is the
1264+ ; ; **standard approach** for filters like Gaussian blur, not an optimization.
1265+ ; ;
1266+ ; ; **How it works**: Instead of applying a k×k kernel to each pixel, we:
1267+ ; ; 1. Apply a 1D kernel horizontally (blur each row)
1268+ ; ; 2. Apply a 1D kernel vertically (blur each column)
1269+ ; ;
1270+ ; ; **Computational advantage**:
1271+ ; ; - 2D convolution with k×k kernel: O(k²) operations per pixel
1272+ ; ; - Separable approach: O(k) operations per pixel (k/2 horizontal + k/2 vertical)
1273+ ; ; - For a 7×7 kernel: 49 vs 7 operations per pixel (~7× reduction)
1274+ ; ;
1275+ ; ; Where:
1276+ ; ; - k = kernel size (e.g., 7 for a 7×7 kernel)
1277+ ; ; - N×M = image dimensions (height × width)
1278+ ; ; - Total work for image: O(N×M×k²) vs O(N×M×k)
1279+ ; ;
1280+ ; ; **Additional optimizations**: Library functions like `convolve/gaussian1d` may use FFT
1281+ ; ; (Fast Fourier Transform) for very large kernels or data, which can be even faster:
1282+ ; ; O(N×M×log(N×M)) regardless of kernel size. This happens automatically based on data size.
1283+
1284+ ; ; ### The convolve Namespace
1285+
1286+ (require '[tech.v3.datatype.convolve :as convolve])
1287+
1288+ ; ; The [`tech.v3.datatype.convolve`](https://cnuernber.github.io/dtype-next/tech.v3.datatype.convolve.html)
1289+ ; ; namespace provides optimized 1D convolution operations:
1290+ ; ;
1291+ ; ; - `convolve/gaussian1d` — 1D Gaussian filter with automatic kernel generation
1292+ ; ; - `convolve/convolve1d` — General 1D convolution
1293+ ; ; - `convolve/gauss-kernel-1d` — Create 1D Gaussian kernels
1294+ ; ;
1295+ ; ; These functions support edge-handling strategies (`:zero`, `:clamp`, `:reflect`, `:wrap`)
1296+ ; ; and may use FFT optimization automatically for large data.
1297+
1298+ (defn gaussian-blur-separable
1299+ " Apply Gaussian blur using separable 1D convolutions.
1300+ Takes: [H W] grayscale tensor, sigma (standard deviation)
1301+ Returns: [H W] float64 tensor
11991302
1200- Note: This implementation prioritizes clarity over performance.
1201- For production use, consider tech.v3.libs.opencv or specialized
1202- convolution libraries for better performance on large images."
1203- [img-2d kernel]
1303+ This is the recommended approach for Gaussian blur.
1304+ Applies horizontal blur to each row, then vertical blur to each column."
1305+ [img-2d sigma]
12041306 (let [[h w] (dtype/shape img-2d)
1205- [kh kw] (dtype/shape kernel)
1206- pad-h (quot kh 2 )
1207- pad-w (quot kw 2 )]
1307+
1308+ ; ; Step 1: Blur each row horizontally
1309+ ; ; tensor/slice gives us a reader of rows (zero-copy)
1310+ rows (tensor/slice img-2d 1 )
1311+
1312+ ; ; dtype/emap applies gaussian1d to each row (lazy)
1313+ rows-blurred (dtype/emap
1314+ (fn [row]
1315+ (convolve/gaussian1d row sigma {:mode :same :edge-mode :reflect }))
1316+ :object
1317+ rows)
1318+
1319+ ; ; Concatenate blurred rows into single buffer, then reshape to [h w]
1320+ horizontal-blurred (tensor/reshape
1321+ (dtype/concat-buffers :float64 rows-blurred)
1322+ [h w])
1323+
1324+ ; ; Step 2: Blur each column vertically
1325+ ; ; tensor/slice-right gives us a reader of columns (zero-copy)
1326+ cols (tensor/slice-right horizontal-blurred 1 )
1327+
1328+ cols-blurred (dtype/emap
1329+ (fn [col]
1330+ (convolve/gaussian1d col sigma {:mode :same :edge-mode :reflect }))
1331+ :object
1332+ cols)]
1333+
1334+ ; ; Reassemble columns (requires transposition, so use compute-tensor)
12081335 (tensor/compute-tensor
12091336 [h w]
12101337 (fn [y x]
1211- ; ; Sum weighted pixel values in kernel neighborhood
1212- (loop [ky 0
1213- kx 0
1214- sum 0.0 ]
1215- (if (>= ky kh)
1216- sum
1217- (let [img-y (+ y ky (- pad-h))
1218- img-x (+ x kx (- pad-w))
1219- in-bounds? (and (>= img-y 0 ) (< img-y h)
1220- (>= img-x 0 ) (< img-x w))
1221- new-sum (if in-bounds?
1222- (+ sum (* (kernel ky kx)
1223- (img-2d img-y img-x)))
1224- sum)
1225- [next-ky next-kx] (if (>= (inc kx) kw)
1226- [(inc ky) 0 ]
1227- [ky (inc kx)])]
1228- (recur next-ky next-kx new-sum)))))
1229- :float32 )))
1338+ ((nth cols-blurred x) y))
1339+ :float64 )))
12301340
1231- ; ; **Apply box blur to grayscale**:
1232-
1233- (def blurred-gray (convolve-2d grayscale kernel-3x3))
1234-
1235- ; ; Compare original vs box blur:
1341+ ; ; **Key dtype-next patterns in this implementation:**
1342+ ; ;
1343+ ; ; - `tensor/slice` and `tensor/slice-right` — Zero-copy readers of rows/columns
1344+ ; ; - `dtype/emap` — Lazy transformation over readers (more efficient than `mapv`)
1345+ ; ; - `dtype/concat-buffers` — Efficiently concatenate buffers for reassembly
1346+ ; ; - `tensor/reshape` — Zero-copy view with different shape
1347+ ; ; - `tensor/compute-tensor` — Used when data needs reordering (columns → rows)
1348+ ; ;
1349+ ; ; The horizontal pass uses `concat-buffers` + `reshape` (fast, sequential data).
1350+ ; ; The vertical pass uses `compute-tensor` (necessary for transposition).
12361351
1237- (kind/table
1238- [[:original :box-blur-3x3 ]
1239- [(bufimg/tensor->image grayscale)
1240- (bufimg/tensor->image (dtype/elemwise-cast blurred-gray :uint8 ))]])
1352+ ; ; The `convolve/gaussian1d` function handles kernel generation, edge modes (`:reflect`,
1353+ ; ; `:wrap`, `:zero`), and may use FFT optimization automatically for large data.
12411354
1242- ; ; Grayscale tensors (2D) are automatically rendered as grayscale images.
1355+ ; ; ---
12431356
1244- ; ; ## Gaussian Blur
1357+ ; ; With separable filters understood, let's apply convolution to practical filtering tasks.
12451358
1246- ; ; [Gaussian blur](https://en.wikipedia.org/wiki/Gaussian_blur) uses a kernel based
1247- ; ; on the Gaussian (normal) distribution. It weights center pixels more heavily than
1248- ; ; edge pixels, producing a smooth, natural-looking blur without artifacts.
1359+ ; ; ## Applying Box Blur
12491360
1250- (defn gaussian-kernel
1251- " Create NxN Gaussian kernel with given sigma.
1252- Takes: n (kernel size), sigma (standard deviation)
1253- Returns: [n n] float32 tensor (normalized to sum=1)"
1254- [n sigma]
1255- (let [center (quot n 2 )]
1256- (-> (tensor/compute-tensor
1257- [n n]
1258- (fn [y x]
1259- (let [dy (- y center)
1260- dx (- x center)
1261- dist-sq (+ (* dy dy) (* dx dx))]
1262- (Math/exp (/ (- dist-sq) (* 2 sigma sigma)))))
1263- :float32 )
1264- ; ; Normalize so weights sum to 1
1265- (as-> t (dfn// t (dfn/sum t))))))
1361+ ; ; Now we can apply our 3×3 box blur kernel using `convolve-2d`:
12661362
1267- (def gaussian-5x5 ( gaussian-kernel 5 1.0 ))
1363+ (def blurred-gray ( convolve-2d grayscale kernel-3x3 ))
12681364
1269- gaussian-5x5
1365+ ; ; Compare original vs blurred:
12701366
1271- ; ; Apply Gaussian blur:
1367+ (kind/table
1368+ [[:original :box-blur-3x3 ]
1369+ [(bufimg/tensor->image grayscale)
1370+ (bufimg/tensor->image (dtype/elemwise-cast blurred-gray :uint8 ))]])
12721371
1273- (def gaussian-blurred (convolve-2d grayscale gaussian-5x5))
1372+ ; ; Box blur creates a simple averaging effect. Grayscale tensors (2D) are automatically
1373+ ; ; rendered as grayscale images.
12741374
1275- ; ; Larger Gaussian blur for stronger effect:
1375+ ; ; ## Gaussian Blur (Separable)
12761376
1277- (def gaussian-9x9 (gaussian-kernel 15 3.0 ))
1377+ ; ; [Gaussian blur](https://en.wikipedia.org/wiki/Gaussian_blur) weights center pixels
1378+ ; ; more heavily than edge pixels based on the Gaussian (normal) distribution, producing
1379+ ; ; smooth, natural-looking blur without artifacts.
1380+ ; ;
1381+ ; ; We use `gaussian-blur-separable` (the recommended approach):
12781382
1279- (def gaussian-blurred-large (convolve-2d grayscale gaussian-9x9))
1383+ (def gaussian-blurred-1 (gaussian-blur-separable grayscale 1.0 ))
1384+ (def gaussian-blurred-1-5 (gaussian-blur-separable grayscale 1.5 ))
12801385
1281- ; ; Compare different blur kernels :
1386+ ; ; Compare blur strengths—notice how Gaussian blur is smoother than box blur :
12821387
12831388(kind/table
1284- [[:original :box-blur-3x3 :gaussian-5x5 :gaussian-9x9 ]
1389+ [[:original :box-blur-3x3 :gaussian-sigma-1.0 :gaussian-sigma-1.5 ]
12851390 [(bufimg/tensor->image grayscale)
12861391 (bufimg/tensor->image (dtype/elemwise-cast blurred-gray :uint8 ))
1287- (bufimg/tensor->image ( dtype/elemwise-cast gaussian-blurred :uint8 ) )
1288- (bufimg/tensor->image ( dtype/elemwise-cast gaussian-blurred-large :uint8 ) )]])
1392+ (bufimg/tensor->image gaussian-blurred-1 )
1393+ (bufimg/tensor->image gaussian-blurred-1-5 )]])
12891394
1290- ; ; ## Sharpen Filter
1395+ ; ; ## Sharpening (Unsharp Masking)
12911396
12921397; ; [Unsharp masking](https://en.wikipedia.org/wiki/Unsharp_masking) sharpens images
12931398; ; by enhancing edges. We subtract a blurred version from the original to extract
1294- ; ; high-frequency details, then add them back amplified.
1295- ; ; Method: original + strength × (original - blur)
1399+ ; ; high-frequency details, then add them back amplified:
1400+ ; ;
1401+ ; ; **Formula**: `sharpened = original + strength × (original - blur)`
12961402
12971403(defn sharpen
12981404 " Sharpen image using unsharp mask.
@@ -1314,16 +1420,15 @@ gaussian-5x5
13141420 [(bufimg/tensor->image grayscale)
13151421 (bufimg/tensor->image (dtype/elemwise-cast sharpened-gray :uint8 ))]])
13161422
1317- ; ; ## Sharpness comparison
1423+ ; ; ### Quantifying Sharpness
13181424
1319- ; ; We can quantify the sharpness of each filter using our `sharpness-score` function.
1320- ; ; However, note that `sharpness-score` expects BGR input, so we need to adapt it for
1321- ; ; grayscale tensors. For simplicity, we'll compute sharpness inline here:
1425+ ; ; We can measure the effect of each filter by computing mean edge magnitude.
1426+ ; ; Higher values = sharper images, lower values = blurrier:
13221427
13231428(-> {:original grayscale
1324- :box-3x3 blurred-gray
1325- :gaussian-5x5 gaussian-blurred
1326- :gaussian-9x9 gaussian-blurred-large
1429+ :box-blur- 3x3 blurred-gray
1430+ :gaussian-sigma-1.0 gaussian-blurred-1
1431+ :gaussian-sigma-1.5 gaussian-blurred-1-5
13271432 :sharpened sharpened-gray}
13281433 (update-vals
13291434 (fn [t]
@@ -1332,6 +1437,8 @@ gaussian-5x5
13321437 (gradient-y t)))))
13331438 tc/dataset)
13341439
1440+ ; ; As expected: sharpening increases edge magnitude, blurring decreases it.
1441+
13351442; ; ## Sobel Edge Detection
13361443
13371444; ; The [Sobel operator](https://en.wikipedia.org/wiki/Sobel_operator) is a classic
0 commit comments