From ff614dbc5843f66147f15cdc7c485b8ae98d5038 Mon Sep 17 00:00:00 2001 From: Filipe Limberger Date: Thu, 21 May 2026 15:17:34 +0000 Subject: [PATCH] interpolate: align IDW all-points and k-nearest paths on coincident inputs The numba/CUDA all-points kernels used to `break` on the first input point with d2==0 and return that point's z, while the scipy.cKDTree k-nearest path averaged the z values of all coincident matches. The two backends thus produced numerically different results on the same input -- a Cat 5 (backend inconsistency) accuracy bug. Both all-points kernels now accumulate the sum and count of coincident points and return the mean, matching the k-nearest path. This is also the mathematically correct IDW limit at a coincident grid cell. Adds TestIDW.test_duplicate_points_average_across_backends as the regression test. Updates .claude/sweep-accuracy-state.csv with the interpolate row for the deep-sweep accuracy run on 2026-05-21. --- .claude/sweep-accuracy-state.csv | 1 + xrspatial/interpolate/_idw.py | 37 +++++++++++++++++---------- xrspatial/tests/test_interpolation.py | 29 +++++++++++++++++++++ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index 40432e971..19c40e9b4 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -16,6 +16,7 @@ geotiff,2026-05-15,1975,HIGH,1;2;5,"Pass 25 (2026-05-15): HIGH fixed -- issue #1 glcm,2026-05-01,1408,HIGH,2,"angle=None averaged NaN as 0, masking no-valid-pairs as zero texture; fixed via nanmean-style averaging" hillshade,2026-04-10T12:00:00Z,,,,"Horn's method correct. All backends consistent. NaN propagation correct. float32 adequate for [0,1] output." hydro,2026-04-30,,LOW,1,Only LOW: twi log(0)=-inf if fa=0 (out-of-contract); MFD weighted sum no Kahan (negligible). No CRIT/HIGH issues. +interpolate,2026-05-21,,HIGH,5,"HIGH backend divergence (Cat 5): IDW all-points (numba/cuda) used to break on the first d2==0 coincident input and return that point's z, while k-nearest (scipy.cKDTree) averaged all coincident matches. Fixed by averaging coincident matches in both kernels. LOW: kriging variance ~1e-15 negative at data points (no clamp); experimental_variogram digitize drops dists exactly at max_dist. cuda-unavailable" kde,2026-04-13T12:00:00Z,1198,,,kde/line_density return zeros for descending-y templates. Fix in PR #1199. mahalanobis,2026-05-01,,LOW,1,"LOW: np.linalg.inv (no pinv fallback) returns garbage for near-singular cov without raising. LOW: two-pass mean/cov instead of Welford could lose precision for inputs with very large mean/small variance. No CRIT/HIGH; all four backends use float64 throughout, NaN handled via isfinite, dist_sq clamped non-negative, singular case raises ValueError." morphology,2026-04-30,"1397,1399",HIGH,2;5,HIGH fixed in #1397/PR #1398: morph_erode/dilate seeded centre cell into running min/max even when kernel[centre]==0 (all 4 backends). HIGH fixed in #1399/PR #1400: dask backends raised on 1xN/Nx1 kernels because empty-slice writeback (0:-0). diff --git a/xrspatial/interpolate/_idw.py b/xrspatial/interpolate/_idw.py index de9aece4b..ad706fd8d 100644 --- a/xrspatial/interpolate/_idw.py +++ b/xrspatial/interpolate/_idw.py @@ -46,23 +46,30 @@ def _idw_cpu_allpoints(x_pts, y_pts, z_pts, n_pts, gy = y_grid[i] w_sum = 0.0 wz_sum = 0.0 - exact = False - exact_val = 0.0 + # When one or more input points coincide with the grid cell + # (d2 == 0), the IDW weight 1/d^power is infinite. The limit + # of IDW as the cell is approached from all directions is the + # mean of the coincident points' z values. We track the sum + # and count of coincident matches and average them, so the + # all-points path agrees with the k-nearest path on duplicate + # input points. + exact_sum = 0.0 + exact_count = 0 for p in range(n_pts): dx = gx - x_pts[p] dy = gy - y_pts[p] d2 = dx * dx + dy * dy if d2 == 0.0: - exact = True - exact_val = z_pts[p] - break + exact_sum += z_pts[p] + exact_count += 1 + continue w = 1.0 / (d2 ** (power * 0.5)) w_sum += w wz_sum += w * z_pts[p] - if exact: - out[i, j] = exact_val + if exact_count > 0: + out[i, j] = exact_sum / exact_count elif w_sum > 0.0: out[i, j] = wz_sum / w_sum else: @@ -130,23 +137,25 @@ def _idw_cuda_kernel(x_pts, y_pts, z_pts, n_pts, gy = y_grid[i] w_sum = 0.0 wz_sum = 0.0 - exact = False - exact_val = 0.0 + # See _idw_cpu_allpoints: coincident input points (d2 == 0) are + # averaged so all-points and k-nearest paths agree on duplicates. + exact_sum = 0.0 + exact_count = 0 for p in range(n_pts): dx = gx - x_pts[p] dy = gy - y_pts[p] d2 = dx * dx + dy * dy if d2 == 0.0: - exact = True - exact_val = z_pts[p] - break + exact_sum += z_pts[p] + exact_count += 1 + continue w = 1.0 / (d2 ** (power * 0.5)) w_sum += w wz_sum += w * z_pts[p] - if exact: - out[i, j] = exact_val + if exact_count > 0: + out[i, j] = exact_sum / exact_count elif w_sum > 0.0: out[i, j] = wz_sum / w_sum else: diff --git a/xrspatial/tests/test_interpolation.py b/xrspatial/tests/test_interpolation.py index d0d54f5e4..e7bbd66cd 100644 --- a/xrspatial/tests/test_interpolation.py +++ b/xrspatial/tests/test_interpolation.py @@ -132,6 +132,35 @@ def test_output_metadata(self): np.testing.assert_array_equal(result.coords['x'].values, template.coords['x'].values) + def test_duplicate_points_average_across_backends(self): + """Coincident input points are averaged in both all-points and + k-nearest paths. + + Regression test: the all-points (numba/cuda) kernel used to break + on the first ``d2 == 0`` match and return that point's z, while + the k-nearest (scipy.cKDTree) path averaged all coincident + matches. Both paths now average, so they agree on duplicates. + """ + x = np.array([0.0, 0.0, 1.0]) + y = np.array([0.0, 0.0, 0.0]) + z = np.array([10.0, 20.0, 30.0]) + + template = _make_template([0.0], [0.0, 0.5, 1.0]) + + # At (0, 0): two coincident inputs with z=10, z=20 -> mean=15. + # At (0.5, 0): no coincident input, normal IDW. + # At (1, 0): coincident with the third input, z=30. + all_points = idw(x, y, z, template, power=2.0).values + k_nearest = idw(x, y, z, template, power=2.0, k=3).values + + np.testing.assert_allclose(all_points[0, 0], 15.0) + np.testing.assert_allclose(k_nearest[0, 0], 15.0) + np.testing.assert_allclose(all_points[0, 2], 30.0) + np.testing.assert_allclose(k_nearest[0, 2], 30.0) + # Both paths should also match on the non-coincident grid point. + np.testing.assert_allclose( + all_points[0, 1], k_nearest[0, 1], rtol=1e-10) + @dask_array_available def test_dask_matches_numpy(self): x, y, z = _grid_points()