Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
da46c22
Keep vendor cell_id in spatial_unlabelled as a segmentation prior
EliHei2 May 18, 2026
a36fb57
Add segger method component
EliHei2 May 18, 2026
c2a3268
Apply suggestions from code review
rcannood May 18, 2026
b9c4803
Merge remote-tracking branch 'origin/main' into segger
rcannood May 18, 2026
3bc10ee
Segger - use pytorch 25.04 instead of 26.02 as a base image (#33)
rcannood May 19, 2026
5f74b6a
Fix segger CI: replace cellpose min_size=-1 with 0 for NumPy 2.x
EliHei2 May 19, 2026
1c10f12
Filter -1 / UNASSIGNED cell_id sentinels before rasterization
EliHei2 May 19, 2026
11d203c
Align segger CLI flags with dpeerlab/segger@main
EliHei2 May 19, 2026
770ca1b
Skip cudf driver validation on GPU-less runners
EliHei2 May 19, 2026
b9ae3e5
CPU stub-and-exit for segger + restore cuspatial install
EliHei2 May 19, 2026
d8765d3
Warn early when no CUDA GPU is detected for segger
EliHei2 May 19, 2026
29d16d1
Hard-fail segger when no CUDA GPU is available
EliHei2 May 19, 2026
6e0ccd7
Merge remote-tracking branch 'origin/main' into segger
rcannood May 19, 2026
bbb5f30
bypass run test for segger
rcannood May 19, 2026
c520074
add segger to wf
rcannood May 19, 2026
8c8560a
use different label
rcannood May 19, 2026
b5da903
Merge remote-tracking branch 'origin/main' into segger
EliHei2 May 19, 2026
3c2e808
Export segger's full per-cell footprint, boundaries, and per-cell table
EliHei2 May 19, 2026
48aed73
Minimal label image for segger: pixel-paint per assigned transcript
EliHei2 May 19, 2026
4b3ddc8
Set production defaults for --init_segmentation and --cellpose_diameter
EliHei2 May 19, 2026
49b7938
Switch segger raster to cKDTree-Voronoi for filled cell masks
EliHei2 May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/methods/segger/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,14 @@ arguments:
- name: --init_segmentation
type: string
choices: [auto, cellpose, transcript_cell_id]
default: auto
description: |
Source of the initial boundary node set that segger trains against.
'auto' uses the transcripts' `cell_id` prior column when it is
present and runs Cellpose on `morphology_mip` otherwise.
'cellpose' always runs Cellpose. 'transcript_cell_id' requires a
`cell_id` column on the transcripts and builds one convex-hull
polygon per cell id.
info:
test_default: auto
- name: --n_epochs
type: integer
default: 20
Expand All @@ -59,9 +58,8 @@ arguments:
description: "Which polygon set drives the prediction graph."
- name: --cellpose_diameter
type: double
default: 30
description: "Cell diameter (pixels) for the Cellpose nucleus pre-segmentation."
info:
test_default: 30

resources:
- type: python_script
Expand Down
126 changes: 91 additions & 35 deletions src/methods/segger/script.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import shutil
import subprocess
from collections import Counter
from pathlib import Path

import anndata as ad
Expand Down Expand Up @@ -249,32 +248,67 @@ def _run_segger(xenium_dir: Path, output_dir: Path) -> Path:
return pq


def _relabel_initial_mask(
initial_labels: np.ndarray,
tx_pixel_xy: np.ndarray,
tx_segger_cell: np.ndarray,
_PIXELS_PER_QUERY_CHUNK = 20_000_000 # cap per-chunk memory at ~160 MB of int64


def _rasterize_via_transcript_voronoi(
assigned_tx: pd.DataFrame,
image_element,
dist_cutoff_factor: float = 5.0,
min_dist_cutoff_px: float = 5.0,
) -> np.ndarray:
"""For each label in `initial_labels`, replace it by the majority segger
cell id of the transcripts that fall on top of it. Pixels whose initial
label has no covering transcripts are left at 0 (background)."""
H, W = initial_labels.shape
ys = np.clip(np.round(tx_pixel_xy[:, 1]).astype(int), 0, H - 1)
xs = np.clip(np.round(tx_pixel_xy[:, 0]).astype(int), 0, W - 1)
tx_init = initial_labels[ys, xs]
relabel = {0: 0}
for init_id in np.unique(tx_init):
if init_id == 0:
continue
seg_ids = tx_segger_cell[tx_init == init_id]
seg_ids = seg_ids[~pd.isna(seg_ids)]
if seg_ids.size == 0:
continue
most = Counter(seg_ids.astype(np.int64).tolist()).most_common(1)[0][0]
relabel[int(init_id)] = int(most)
out = np.zeros_like(initial_labels, dtype=np.int64)
for k, v in relabel.items():
out[initial_labels == k] = v
return out
"""Rasterize the segmentation by assigning every pixel to the cell of
its nearest segger-assigned transcript (transcript-Voronoi).

Gives filled cell footprints — not just isolated transcript pixels —
while remaining cheap: one cKDTree build over the assigned transcripts
plus one batched `tree.query(pixels, workers=-1)` that parallelizes
across all cores in C. Equivalent in accuracy to a per-cell Delaunay
alpha-shape for anything `process_prediction` measures (a transcript
is always closest to itself, so its pixel lookup returns its own
cell).

Pixels farther than `dist_cutoff_factor * median_nearest_neighbor_dist`
(in pixel units, lower-bounded by `min_dist_cutoff_px`) from any
assigned transcript stay 0 — cells don't bleed into empty regions.

`assigned_tx` must carry columns 'x', 'y', 'segger_cell_id'."""
from scipy.spatial import cKDTree

H, W = image_element.shape[-2:]
M_g2p = _affine_global_to_pixel(image_element)
xy_pix = _apply_affine(M_g2p, assigned_tx[["x", "y"]].to_numpy())
cell_ids = assigned_tx["segger_cell_id"].to_numpy(dtype=np.int64)

tree = cKDTree(xy_pix)

sample = min(20_000, len(xy_pix))
if sample < 2:
d_cutoff = float(min_dist_cutoff_px)
else:
rng = np.random.default_rng(0)
idx = rng.choice(len(xy_pix), sample, replace=False)
nn_d, _ = tree.query(xy_pix[idx], k=2, workers=-1)
median_nn = float(np.median(nn_d[:, 1]))
d_cutoff = max(dist_cutoff_factor * median_nn, float(min_dist_cutoff_px))
print(
f"transcript-Voronoi raster: {len(xy_pix)} transcripts, "
f"{H}x{W} grid, d_cutoff={d_cutoff:.2f}px",
flush=True,
)

labels = np.zeros((H, W), dtype=np.uint32)
rows_per_chunk = max(1, _PIXELS_PER_QUERY_CHUNK // max(W, 1))
xs_row = np.arange(W)
for y0 in range(0, H, rows_per_chunk):
y1 = min(H, y0 + rows_per_chunk)
ys = np.arange(y0, y1)
gx, gy = np.meshgrid(xs_row, ys)
pts = np.column_stack([gx.ravel(), gy.ravel()])
d, idx = tree.query(pts, k=1, workers=-1)
chunk = np.where(d > d_cutoff, 0, cell_ids[idx]).astype(np.uint32)
labels[y0:y1, :] = chunk.reshape(y1 - y0, W)
return labels


# ----------------------------- main -------------------------------- #
Expand Down Expand Up @@ -328,9 +362,8 @@ def _relabel_initial_mask(
print(f"segger emitted {seg.height} rows", flush=True)

# Keep only confident assignments. Mirror segger's canonical
# valid_cell_id_expr (null / "-1" / "UNASSIGNED" / "NONE") so that the
# -1 unassigned sentinel segger emits doesn't reach _relabel_initial_mask
# and ultimately the uint32 labels image.
# valid_cell_id_expr (null / "-1" / "UNASSIGNED" / "NONE") so the
# unassigned sentinel doesn't propagate into the label image or table.
_seg_id_str = pl.col("segger_cell_id").cast(pl.Utf8).str.to_uppercase()
seg = seg.filter(
pl.col("keep")
Expand All @@ -340,19 +373,42 @@ def _relabel_initial_mask(
& (_seg_id_str != "NONE")
)
print(f"kept assignments: {seg.height}", flush=True)

# --- Step 3: rasterize segger's per-cell footprint ---
# Build the label image as a transcript-Voronoi: every pixel is the
# cell of the nearest segger-assigned transcript, with a density-based
# distance cutoff so cells don't bleed into empty regions. Cheap (one
# cKDTree build + one batched parallel query) and gives filled cell
# masks instead of sparse transcript dots. Equivalent in accuracy to a
# per-cell Delaunay alpha-shape for what `process_prediction`'s pixel
# lookup measures.
dataset_id = sdata.tables["table"].uns["dataset_id"]

if seg.height == 0:
print("WARNING: segger kept no transcript assignments — falling back to the initial mask.", flush=True)
print(
"WARNING: segger kept no transcript assignments — falling back to "
"the initial nucleus mask.",
flush=True,
)
final_labels = initial_labels.astype(np.int64)
else:
row_idx = seg["row_index"].to_numpy()
segger_cell = seg["segger_cell_id"].to_numpy()
xy_global = tx_pd.loc[row_idx, ["x", "y"]].to_numpy()
xy_pixel = _apply_affine(_affine_global_to_pixel(image_el), xy_global)
final_labels = _relabel_initial_mask(initial_labels, xy_pixel, segger_cell)
assigned_tx = pd.DataFrame({
"x": tx_pd.loc[row_idx, "x"].to_numpy(),
"y": tx_pd.loc[row_idx, "y"].to_numpy(),
"segger_cell_id": np.asarray(segger_cell, dtype=np.int64),
})
print(
f"rasterizing {len(assigned_tx)} assigned transcripts across "
f"{assigned_tx['segger_cell_id'].nunique()} cells",
flush=True,
)
final_labels = _rasterize_via_transcript_voronoi(assigned_tx, image_el)

final_labels = _to_lower_uint(final_labels)

# --- Step 3: write the SpatialData prediction ---
# --- Step 4: write the SpatialData prediction ---
sd_out = sd.SpatialData(
labels={
"segmentation": Labels2DModel.parse(
Expand All @@ -363,7 +419,7 @@ def _relabel_initial_mask(
tables={
"table": ad.AnnData(
uns={
"dataset_id": sdata.tables["table"].uns["dataset_id"],
"dataset_id": dataset_id,
"method_id": meta["name"],
}
),
Expand Down
Loading