Fix NaN gradients from jaxnnls backward pass via Jacobi preconditioning (#279)

Jammy2211 · Jammy2211 · web-flow · commit 8e6848dd8aaf · 2026-04-14T22:15:25.000+01:00
The curvature matrix passed into `jaxnnls.solve_nnls_primal` from
`reconstruction_positive_only_from` is severely ill-conditioned for
typical MGE / linear-light-profile problems (cond ~ 6.7e10 on a 40x40
Q). This causes:

  - forward NNLS to hit its 50-iteration cap without converging,
  - the relaxed-KKT backward solver (custom_vjp) to diverge to NaN
    from a non-converged seed,
  - `jax.value_and_grad` to return all-NaN gradients through the whole
    downstream pipeline.

Fix: Jacobi (diagonal) preconditioning inside the JAX branch. Rescale
Q so its diagonal is unit via D = diag(Q)^{-1/2}, solve
`(D Q D) y = D q` with `y &gt;= 0`, recover `x = D y`. D is diagonal and
positive so non-negativity is preserved, and the primal solution is
mathematically equivalent to the raw solve. Empirically cond drops
~4 orders of magnitude (6.7e10 -&gt; 1.1e7), forward converges in ~19
iters, relaxed converges in ~21 iters, and grad norm is finite
(~6.8e4). Forward NNLS also runs ~2x faster.

Gated by a new `inversion.nnls_jacobi_preconditioning` key in
`autoarray/config/general.yaml`, default True. Falls back to True if
the key is missing so workspace configs that shadow ours do not break.

Adds two regression tests to `test_inversion_util.py` covering the
ill-conditioned-gradient case and primal equivalence with the raw
solve on a well-conditioned problem.

Co-authored-by: Jammy2211 &lt;JNightingale2211@gmail.com&gt;
diff --git a/autoarray/config/general.yaml b/autoarray/config/general.yaml
@@ -6,6 +6,7 @@ inversion:
   use_edge_zeroed_pixels : true       # If True, the edge pixels of a pixelization are set to zero, which prevents unphysical values in the reconstructed solution at the edge of the pixelization.
   no_regularization_add_to_curvature_diag_value : 1.0e-3 # The default value added to the curvature matrix's diagonal when regularization is not applied to a linear object, which prevents inversion's failing due to the matrix being singular.
   use_border_relocator: false          # If True, by default a pixelization's border is used to relocate all pixels outside its border to the border.
+  nnls_jacobi_preconditioning: true   # If True (default), the curvature matrix passed to jaxnnls.solve_nnls_primal is Jacobi-preconditioned (D Q D y = D q, x = D y). Fixes NaN backward-pass gradients on ill-conditioned Q and roughly halves forward solve time. Set False to restore the raw unpreconditioned solve.
   reconstruction_vmax_factor: 0.5     # Plots of an Inversion's reconstruction use the reconstructed data's bright value multiplied by this factor.
 numba:
   use_numba: true
diff --git a/autoarray/inversion/inversion/inversion_util.py b/autoarray/inversion/inversion/inversion_util.py
@@ -275,6 +275,29 @@ def reconstruction_positive_only_from(
     if xp.__name__.startswith("jax"):
 
         import jaxnnls
+        from autoconf import conf
+
+        try:
+            use_jacobi = conf.instance["general"]["inversion"][
+                "nnls_jacobi_preconditioning"
+            ]
+        except KeyError:
+            # Workspaces ship their own general.yaml that shadows autoarray's;
+            # default to True so gradients remain well-defined unless the user
+            # explicitly disables preconditioning in the shadowing config.
+            use_jacobi = True
+
+        if use_jacobi:
+            # Ill-conditioned Q makes jaxnnls's relaxed-KKT backward pass
+            # produce NaN gradients. Rescale Q so its diagonal is unit:
+            # solve (D Q D) y = D q with y >= 0, recover x = D y. D is
+            # diagonal positive, so non-negativity is preserved and the
+            # primal solution is mathematically equivalent.
+            d = xp.sqrt(xp.diag(curvature_reg_matrix))
+            D = 1.0 / d
+            Q_pc = (curvature_reg_matrix * D[:, None]) * D[None, :]
+            q_pc = data_vector * D
+            return jaxnnls.solve_nnls_primal(Q_pc, q_pc) * D
 
         return jaxnnls.solve_nnls_primal(curvature_reg_matrix, data_vector)
 
diff --git a/test_autoarray/inversion/inversion/test_inversion_util.py b/test_autoarray/inversion/inversion/test_inversion_util.py
@@ -228,3 +228,74 @@ def test__preconditioner_matrix_via_mapping_matrix_from():
         preconditioner_matrix
         == np.array([[5.0, 2.0, 3.0], [4.0, 9.0, 6.0], [7.0, 8.0, 13.0]])
     ).all()
+
+
+def test__reconstruction_positive_only_from__jax_ill_conditioned_grad_is_finite():
+    """
+    On ill-conditioned curvature matrices the jaxnnls backward pass used to
+    return NaN gradients, because the relaxed-KKT solver diverged. Jacobi
+    preconditioning inside `reconstruction_positive_only_from` re-parameterises
+    the NNLS problem so the solve converges and `jax.value_and_grad` produces
+    finite gradients. Skip the test if jax / jaxnnls are not available.
+    """
+    jax = pytest.importorskip("jax")
+    import jax.numpy as jnp
+    pytest.importorskip("jaxnnls")
+
+    # A small deliberately ill-conditioned symmetric positive-definite Q,
+    # cond(Q) ~ 1e7, which is enough to break the raw jaxnnls backward pass.
+    rng = np.random.default_rng(0)
+    n = 10
+    U, _ = np.linalg.qr(rng.standard_normal((n, n)))
+    eigs = np.logspace(-4, 3, n)
+    Q_np = (U * eigs) @ U.T
+    Q_np = 0.5 * (Q_np + Q_np.T)
+    q_np = rng.standard_normal(n)
+
+    Q = jnp.array(Q_np)
+    q = jnp.array(q_np)
+
+    def loss(q_in):
+        x = aa.util.inversion.reconstruction_positive_only_from(
+            data_vector=q_in, curvature_reg_matrix=Q, xp=jnp,
+        )
+        return jnp.sum(x)
+
+    value, grad = jax.value_and_grad(loss)(q)
+
+    assert np.isfinite(float(value))
+    grad_np = np.array(grad)
+    assert np.all(np.isfinite(grad_np)), (
+        f"gradient has {np.sum(~np.isfinite(grad_np))} non-finite entries"
+    )
+
+
+def test__reconstruction_positive_only_from__jax_matches_unpreconditioned_primal():
+    """
+    Jacobi preconditioning is a change of coordinates; the forward primal
+    solution must match the raw jaxnnls solve to within solver tolerance for
+    a moderately-conditioned problem where the raw solver also converges.
+    """
+    jax = pytest.importorskip("jax")
+    import jax.numpy as jnp
+    jaxnnls = pytest.importorskip("jaxnnls")
+
+    rng = np.random.default_rng(1)
+    n = 8
+    U, _ = np.linalg.qr(rng.standard_normal((n, n)))
+    eigs = np.linspace(0.5, 5.0, n)  # well-conditioned
+    Q_np = (U * eigs) @ U.T
+    Q_np = 0.5 * (Q_np + Q_np.T)
+    q_np = rng.standard_normal(n)
+
+    Q = jnp.array(Q_np)
+    q = jnp.array(q_np)
+
+    x_raw = np.array(jaxnnls.solve_nnls_primal(Q, q))
+    x_pc = np.array(
+        aa.util.inversion.reconstruction_positive_only_from(
+            data_vector=q, curvature_reg_matrix=Q, xp=jnp,
+        )
+    )
+
+    np.testing.assert_allclose(x_pc, x_raw, rtol=1e-6, atol=1e-8)