From ff614dbc5843f66147f15cdc7c485b8ae98d5038 Mon Sep 17 00:00:00 2001
From: Filipe Limberger <lipilimb07@gmail.com>
Date: Thu, 21 May 2026 15:17:34 +0000
Subject: [PATCH] interpolate: align IDW all-points and k-nearest paths on
 coincident inputs

The numba/CUDA all-points kernels used to `break` on the first input
point with d2==0 and return that point's z, while the scipy.cKDTree
k-nearest path averaged the z values of all coincident matches.  The
two backends thus produced numerically different results on the same
input -- a Cat 5 (backend inconsistency) accuracy bug.

Both all-points kernels now accumulate the sum and count of coincident
points and return the mean, matching the k-nearest path.  This is also
the mathematically correct IDW limit at a coincident grid cell.

Adds TestIDW.test_duplicate_points_average_across_backends as the
regression test.

Updates .claude/sweep-accuracy-state.csv with the interpolate row for
the deep-sweep accuracy run on 2026-05-21.
---
 .claude/sweep-accuracy-state.csv      |  1 +
 xrspatial/interpolate/_idw.py         | 37 +++++++++++++++++----------
 xrspatial/tests/test_interpolation.py | 29 +++++++++++++++++++++
 3 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv
index 40432e971..19c40e9b4 100644
--- a/.claude/sweep-accuracy-state.csv
+++ b/.claude/sweep-accuracy-state.csv
@@ -16,6 +16,7 @@ geotiff,2026-05-15,1975,HIGH,1;2;5,"Pass 25 (2026-05-15): HIGH fixed -- issue #1
 glcm,2026-05-01,1408,HIGH,2,"angle=None averaged NaN as 0, masking no-valid-pairs as zero texture; fixed via nanmean-style averaging"
 hillshade,2026-04-10T12:00:00Z,,,,"Horn's method correct. All backends consistent. NaN propagation correct. float32 adequate for [0,1] output."
 hydro,2026-04-30,,LOW,1,Only LOW: twi log(0)=-inf if fa=0 (out-of-contract); MFD weighted sum no Kahan (negligible). No CRIT/HIGH issues.
+interpolate,2026-05-21,,HIGH,5,"HIGH backend divergence (Cat 5): IDW all-points (numba/cuda) used to break on the first d2==0 coincident input and return that point's z, while k-nearest (scipy.cKDTree) averaged all coincident matches. Fixed by averaging coincident matches in both kernels. LOW: kriging variance ~1e-15 negative at data points (no clamp); experimental_variogram digitize drops dists exactly at max_dist. cuda-unavailable"
 kde,2026-04-13T12:00:00Z,1198,,,kde/line_density return zeros for descending-y templates. Fix in PR #1199.
 mahalanobis,2026-05-01,,LOW,1,"LOW: np.linalg.inv (no pinv fallback) returns garbage for near-singular cov without raising. LOW: two-pass mean/cov instead of Welford could lose precision for inputs with very large mean/small variance. No CRIT/HIGH; all four backends use float64 throughout, NaN handled via isfinite, dist_sq clamped non-negative, singular case raises ValueError."
 morphology,2026-04-30,"1397,1399",HIGH,2;5,HIGH fixed in #1397/PR #1398: morph_erode/dilate seeded centre cell into running min/max even when kernel[centre]==0 (all 4 backends). HIGH fixed in #1399/PR #1400: dask backends raised on 1xN/Nx1 kernels because empty-slice writeback (0:-0).
diff --git a/xrspatial/interpolate/_idw.py b/xrspatial/interpolate/_idw.py
index de9aece4b..ad706fd8d 100644
--- a/xrspatial/interpolate/_idw.py
+++ b/xrspatial/interpolate/_idw.py
@@ -46,23 +46,30 @@ def _idw_cpu_allpoints(x_pts, y_pts, z_pts, n_pts,
             gy = y_grid[i]
             w_sum = 0.0
             wz_sum = 0.0
-            exact = False
-            exact_val = 0.0
+            # When one or more input points coincide with the grid cell
+            # (d2 == 0), the IDW weight 1/d^power is infinite.  The limit
+            # of IDW as the cell is approached from all directions is the
+            # mean of the coincident points' z values.  We track the sum
+            # and count of coincident matches and average them, so the
+            # all-points path agrees with the k-nearest path on duplicate
+            # input points.
+            exact_sum = 0.0
+            exact_count = 0
 
             for p in range(n_pts):
                 dx = gx - x_pts[p]
                 dy = gy - y_pts[p]
                 d2 = dx * dx + dy * dy
                 if d2 == 0.0:
-                    exact = True
-                    exact_val = z_pts[p]
-                    break
+                    exact_sum += z_pts[p]
+                    exact_count += 1
+                    continue
                 w = 1.0 / (d2 ** (power * 0.5))
                 w_sum += w
                 wz_sum += w * z_pts[p]
 
-            if exact:
-                out[i, j] = exact_val
+            if exact_count > 0:
+                out[i, j] = exact_sum / exact_count
             elif w_sum > 0.0:
                 out[i, j] = wz_sum / w_sum
             else:
@@ -130,23 +137,25 @@ def _idw_cuda_kernel(x_pts, y_pts, z_pts, n_pts,
         gy = y_grid[i]
         w_sum = 0.0
         wz_sum = 0.0
-        exact = False
-        exact_val = 0.0
+        # See _idw_cpu_allpoints: coincident input points (d2 == 0) are
+        # averaged so all-points and k-nearest paths agree on duplicates.
+        exact_sum = 0.0
+        exact_count = 0
 
         for p in range(n_pts):
             dx = gx - x_pts[p]
             dy = gy - y_pts[p]
             d2 = dx * dx + dy * dy
             if d2 == 0.0:
-                exact = True
-                exact_val = z_pts[p]
-                break
+                exact_sum += z_pts[p]
+                exact_count += 1
+                continue
             w = 1.0 / (d2 ** (power * 0.5))
             w_sum += w
             wz_sum += w * z_pts[p]
 
-        if exact:
-            out[i, j] = exact_val
+        if exact_count > 0:
+            out[i, j] = exact_sum / exact_count
         elif w_sum > 0.0:
             out[i, j] = wz_sum / w_sum
         else:
diff --git a/xrspatial/tests/test_interpolation.py b/xrspatial/tests/test_interpolation.py
index d0d54f5e4..e7bbd66cd 100644
--- a/xrspatial/tests/test_interpolation.py
+++ b/xrspatial/tests/test_interpolation.py
@@ -132,6 +132,35 @@ def test_output_metadata(self):
         np.testing.assert_array_equal(result.coords['x'].values,
                                       template.coords['x'].values)
 
+    def test_duplicate_points_average_across_backends(self):
+        """Coincident input points are averaged in both all-points and
+        k-nearest paths.
+
+        Regression test: the all-points (numba/cuda) kernel used to break
+        on the first ``d2 == 0`` match and return that point's z, while
+        the k-nearest (scipy.cKDTree) path averaged all coincident
+        matches.  Both paths now average, so they agree on duplicates.
+        """
+        x = np.array([0.0, 0.0, 1.0])
+        y = np.array([0.0, 0.0, 0.0])
+        z = np.array([10.0, 20.0, 30.0])
+
+        template = _make_template([0.0], [0.0, 0.5, 1.0])
+
+        # At (0, 0): two coincident inputs with z=10, z=20 -> mean=15.
+        # At (0.5, 0): no coincident input, normal IDW.
+        # At (1, 0): coincident with the third input, z=30.
+        all_points = idw(x, y, z, template, power=2.0).values
+        k_nearest = idw(x, y, z, template, power=2.0, k=3).values
+
+        np.testing.assert_allclose(all_points[0, 0], 15.0)
+        np.testing.assert_allclose(k_nearest[0, 0], 15.0)
+        np.testing.assert_allclose(all_points[0, 2], 30.0)
+        np.testing.assert_allclose(k_nearest[0, 2], 30.0)
+        # Both paths should also match on the non-coincident grid point.
+        np.testing.assert_allclose(
+            all_points[0, 1], k_nearest[0, 1], rtol=1e-10)
+
     @dask_array_available
     def test_dask_matches_numpy(self):
         x, y, z = _grid_points()