From da7de9535b9096891939e3bddfff05d6601afc4c Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Fri, 29 May 2026 22:49:43 -0500
Subject: [PATCH 01/14] Add direct SuSiE fitting option and unified top-loci
 export

This update adds two fine-mapping output improvements while preserving existing default behavior.

First, univariate fine-mapping can now bypass SuSiE-inf initialization when ordinary SuSiE fitting is explicitly requested. The SuSiE-inf path remains the default for compatibility, and TWAS-weight generation continues to require the existing SuSiE-inf workflow.

Second, SuSiE wrapper output now includes a unified top-loci export table alongside the existing wrapper-facing summaries. The export keeps posterior effect summaries separate from marginal association fields, preserves credible-set membership including overlapping credible sets, and leaves existing top_loci and top_loci_long contracts unchanged.

Focused regression tests cover the direct SuSiE path and the unified top-loci export contract.
---
 R/susie_wrapper.R                         | 195 +++++++++++++++++++++-
 R/univariate_pipeline.R                   |  49 ++++--
 tests/testthat/test_susie_wrapper.R       |  97 +++++++++++
 tests/testthat/test_univariate_pipeline.R |  39 +++++
 4 files changed, 365 insertions(+), 15 deletions(-)

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index bb1e2a8f..d1ecdf53 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -267,6 +267,8 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
   )
 
   trimmed <- trim_finemapping_fit(fit, effect_idx, method, cs_tables)
+  trimmed$X_scalar <- X_scalar
+  trimmed$y_scalar <- y_scalar
 
   # Build FineMappingResult S4 object
   fm_result <- FineMappingResult(
@@ -491,6 +493,186 @@ build_top_loci_wide <- function(top_loci_long, posts) {
   out
 }
 
+build_top_loci_export <- function(top_loci, top_loci_long = NULL,
+                                  primary_method = "susie",
+                                  result_trimmed = NULL,
+                                  variant_names = NULL,
+                                  gene_id = NA_character_,
+                                  event_id = NA_character_,
+                                  n = NA_real_, af = NULL) {
+  empty <- function() {
+    data.frame(
+      "#chr" = integer(), start = integer(), end = integer(),
+      a1 = character(), a2 = character(), variant_ID = character(),
+      gene_ID = character(), event_ID = character(),
+      "cs_coverage_0.95" = integer(), "cs_coverage_0.7" = integer(),
+      "cs_coverage_0.5" = integer(), PIP = numeric(),
+      conditional_effect = numeric(), conditional_effect_se = numeric(),
+      beta = numeric(), se = numeric(), n = numeric(), af = numeric(),
+      stringsAsFactors = FALSE, check.names = FALSE
+    )
+  }
+  if (is.null(top_loci) || !is.data.frame(top_loci) || nrow(top_loci) == 0) {
+    return(empty())
+  }
+
+  top_loci <- .translate_legacy_top_loci_cs_columns(top_loci)
+  variant_col <- intersect(c("variant_ID", "variant_id"), names(top_loci))
+  if (length(variant_col) == 0) stop("top_loci is missing variant_id or variant_ID.")
+  variant_id <- as.character(top_loci[[variant_col[[1]]]])
+  n_rows <- length(variant_id)
+
+  match_numeric <- function(value) {
+    out <- rep(NA_real_, n_rows)
+    if (is.null(value) || length(value) == 0) return(out)
+    value_names <- names(value)
+    value <- suppressWarnings(as.numeric(value))
+    if (!is.null(value_names) && length(value_names) == length(value)) {
+      names(value) <- value_names
+    }
+    if (!is.null(names(value))) {
+      query <- tryCatch(normalize_variant_id(variant_id), error = function(e) variant_id)
+      names(value) <- tryCatch(normalize_variant_id(names(value)), error = function(e) names(value))
+      return(as.numeric(value[query]))
+    }
+    if (length(value) == 1) return(rep(value, n_rows))
+    if (length(value) == n_rows) return(value)
+    out
+  }
+  column_numeric <- function(columns) {
+    hit <- intersect(columns, names(top_loci))
+    if (length(hit) == 0) return(rep(NA_real_, n_rows))
+    suppressWarnings(as.numeric(top_loci[[hit[[1]]]]))
+  }
+  cs_column <- function(coverage) {
+    hit <- intersect(format_cs_column(coverage, primary_method), names(top_loci))
+    if (length(hit) > 0) return(as.integer(top_loci[[hit[[1]]]]))
+    if (any(grepl("^CS_[0-9.]+_[A-Za-z0-9_.-]+$", names(top_loci)))) {
+      return(rep(0L, n_rows))
+    }
+    rep(NA_integer_, n_rows)
+  }
+  scale_effect <- function(value) {
+    if (is.null(value) || is.null(result_trimmed)) return(value)
+    y_scalar <- suppressWarnings(as.numeric(result_trimmed$y_scalar %||% 1))
+    if (length(y_scalar) != 1 || !is.finite(y_scalar)) y_scalar <- 1
+    x_scalar <- result_trimmed$X_scalar
+    if (is.null(x_scalar)) return(value * y_scalar)
+    if (length(x_scalar) == 1) {
+      x_scalar <- rep(as.numeric(x_scalar), length(value))
+    } else if (is.null(names(x_scalar)) && !is.null(names(value)) &&
+               length(x_scalar) == length(value)) {
+      names(x_scalar) <- names(value)
+    }
+    if (!is.null(names(value)) && !is.null(names(x_scalar))) {
+      query <- tryCatch(normalize_variant_id(names(value)), error = function(e) names(value))
+      names(x_scalar) <- tryCatch(normalize_variant_id(names(x_scalar)),
+                                  error = function(e) names(x_scalar))
+      x_scalar <- as.numeric(x_scalar[query])
+    } else if (length(x_scalar) != length(value)) {
+      x_scalar <- rep(NA_real_, length(value))
+    }
+    ok <- is.finite(value) & is.finite(x_scalar) & x_scalar != 0
+    value[ok] <- value[ok] * y_scalar / x_scalar[ok]
+    value
+  }
+
+  parsed <- tryCatch(
+    suppressWarnings(parse_variant_id(variant_id)),
+    error = function(e) NULL
+  )
+  if (is.null(parsed) || nrow(parsed) != n_rows) {
+    parsed <- data.frame(
+      chrom = rep(NA_integer_, n_rows), pos = rep(NA_integer_, n_rows),
+      A2 = rep(NA_character_, n_rows), A1 = rep(NA_character_, n_rows),
+      stringsAsFactors = FALSE
+    )
+  }
+
+  pip_col <- resolve_pip_column(top_loci, primary_method)
+  effect <- effect_se <- rep(NA_real_, n_rows)
+  if (!is.null(result_trimmed)) {
+    names_from_fit <- variant_names %||% names(result_trimmed$pip) %||%
+      colnames(result_trimmed$alpha)
+    effect <- tryCatch(susieR::coef.susie(result_trimmed), error = function(e) NULL)
+    effect_se <- tryCatch(susieR::susie_get_posterior_sd(result_trimmed),
+                          error = function(e) NULL)
+    if (!is.null(effect)) {
+      if (!is.null(names_from_fit) && length(names_from_fit) == length(effect)) {
+        names(effect) <- names_from_fit
+      }
+      effect <- match_numeric(scale_effect(effect))
+    } else {
+      effect <- rep(NA_real_, n_rows)
+    }
+    if (!is.null(effect_se)) {
+      if (!is.null(names_from_fit) && length(names_from_fit) == length(effect_se)) {
+        names(effect_se) <- names_from_fit
+      }
+      effect_se <- match_numeric(abs(scale_effect(effect_se)))
+    } else {
+      effect_se <- rep(NA_real_, n_rows)
+    }
+  }
+
+  out <- data.frame(
+    "#chr" = parsed$chrom,
+    start = parsed$pos - 1L,
+    end = parsed$pos,
+    a1 = parsed$A1,
+    a2 = parsed$A2,
+    variant_ID = variant_id,
+    gene_ID = rep(gene_id, length.out = n_rows),
+    event_ID = rep(event_id, length.out = n_rows),
+    "cs_coverage_0.95" = cs_column(0.95),
+    "cs_coverage_0.7" = cs_column(0.7),
+    "cs_coverage_0.5" = cs_column(0.5),
+    PIP = if (is.null(pip_col)) rep(NA_real_, n_rows) else as.numeric(top_loci[[pip_col]]),
+    conditional_effect = effect,
+    conditional_effect_se = effect_se,
+    beta = column_numeric(c("beta", "betahat")),
+    se = column_numeric(c("se", "sebetahat")),
+    n = if (any(!is.na(column_numeric("n")))) column_numeric("n") else match_numeric(n),
+    af = if (any(!is.na(column_numeric("af")))) column_numeric("af") else match_numeric(af),
+    stringsAsFactors = FALSE,
+    check.names = FALSE
+  )
+
+  if (is.null(top_loci_long) || !is.data.frame(top_loci_long) ||
+      nrow(top_loci_long) == 0 || !all(c("variant_id", "method", "coverage", "cs") %in%
+                                       names(top_loci_long))) {
+    return(out)
+  }
+  coverage_columns <- c("0.95" = "cs_coverage_0.95",
+                        "0.7" = "cs_coverage_0.7",
+                        "0.5" = "cs_coverage_0.5")
+  rows <- lapply(seq_len(nrow(out)), function(i) {
+    row <- out[i, , drop = FALSE]
+    memberships <- lapply(names(coverage_columns), function(coverage) {
+      hits <- top_loci_long[
+        as.character(top_loci_long$variant_id) == row$variant_ID[[1]] &
+          as.character(top_loci_long$method) == primary_method &
+          abs(as.numeric(top_loci_long$coverage) - as.numeric(coverage)) < 1e-8,
+        , drop = FALSE
+      ]
+      unique(as.integer(hits$cs[!is.na(hits$cs)]))
+    })
+    names(memberships) <- coverage_columns
+    n_membership <- max(1L, vapply(memberships, length, integer(1)))
+    do.call(rbind, lapply(seq_len(n_membership), function(j) {
+      expanded <- row
+      for (column in names(memberships)) {
+        values <- memberships[[column]]
+        if (length(values) > 0) expanded[[column]] <- values[[min(j, length(values))]]
+      }
+      expanded
+    }))
+  })
+  out <- do.call(rbind, rows)
+  rownames(out) <- NULL
+  out
+}
+
 trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
   alpha <- .as_effect_matrix(fit$alpha)
   lbf_variable <- .as_lbf_matrix(fit)
@@ -546,20 +728,29 @@ trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
 #' @param post Output from \code{\link{postprocess_finemapping_fits}}.
 #' @param primary_method Method whose result should populate root-level fields.
 #' @return A list with root-level fields including \code{variant_names},
-#'   \code{susie_result_trimmed}, \code{top_loci_long}, and \code{top_loci}.
+#'   \code{susie_result_trimmed}, \code{top_loci_long}, \code{top_loci},
+#'   and the exported table \code{top_loci_export}.
 #' @export
 format_finemapping_output <- function(post, primary_method) {
   method_post <- post$finemapping_results[[primary_method]]
   if (is.null(method_post)) {
     stop("primary_method was not found in finemapping_results: ", primary_method)
   }
+  top_loci_export <- build_top_loci_export(
+    top_loci = post$top_loci,
+    top_loci_long = post$top_loci_long,
+    primary_method = primary_method,
+    result_trimmed = method_post$result_trimmed,
+    variant_names = method_post$variant_names
+  )
   keep_names <- setdiff(names(method_post), c("result_trimmed", "top_loci_long"))
   c(
     method_post[keep_names],
     list(
       susie_result_trimmed = method_post$result_trimmed,
       top_loci_long = post$top_loci_long,
-      top_loci = post$top_loci
+      top_loci = post$top_loci,
+      top_loci_export = top_loci_export
     )
   )
 }
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 6ec7cf71..3339cafc 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -1,8 +1,8 @@
 #' Univariate Analysis Pipeline
 #'
 #' This function performs univariate analysis for fine-mapping and Transcriptome-Wide Association Study (TWAS)
-#' with optional cross-validation. Fine-mapping fits SuSiE-inf first and then
-#' fits SuSiE initialized from the SuSiE-inf result.
+#' with optional cross-validation. By default, fine-mapping fits SuSiE-inf first
+#' and then fits SuSiE initialized from the SuSiE-inf result.
 #'
 #' @param X A matrix of genotype data where rows represent samples and columns represent genetic variants.
 #' @param Y A vector of phenotype measurements.
@@ -25,6 +25,9 @@
 #' @param finemapping_extra_opts Additional options passed to \code{susieR::susie()}.
 #'   SuSiE-inf is always fitted with \code{refine = FALSE}; the ordinary SuSiE
 #'   fit keeps these options and is initialized with \code{model_init}.
+#' @param fit_susie_inf Whether to fit SuSiE-inf before ordinary SuSiE. Default
+#'   is TRUE for existing pipeline compatibility. If FALSE, ordinary SuSiE is
+#'   fitted directly and SuSiE-inf fitted objects/results are not returned.
 #' @param twas_weights Whether to compute TWAS weights. Default is TRUE.
 #' @param sample_partition Optional data frame with Sample and Fold columns for cross-validation. Default is NULL.
 #' @param max_cv_variants The maximum number of variants to be included in cross-validation. Default is -1 (no limit).
@@ -58,6 +61,7 @@ univariate_analysis_pipeline <- function(
     coverage = c(0.95, 0.7, 0.5),
     min_abs_corr = 0.8,
     finemapping_extra_opts = list(refine = TRUE),
+    fit_susie_inf = TRUE,
     # TWAS weights and CV for TWAS weights
     twas_weights = TRUE,
     sample_partition = NULL,
@@ -75,6 +79,12 @@ univariate_analysis_pipeline <- function(
   if (!is.numeric(Y_scalar) || length(Y_scalar) != 1) stop("Y_scalar must be a numeric scalar")
   if (!is.numeric(L) || L <= 0) stop("L must be a positive integer")
   if (!is.null(L_greedy) && (!is.numeric(L_greedy) || L_greedy <= 0)) stop("L_greedy must be NULL or a positive integer")
+  if (!is.logical(fit_susie_inf) || length(fit_susie_inf) != 1 || is.na(fit_susie_inf)) {
+    stop("fit_susie_inf must be TRUE or FALSE")
+  }
+  if (!isTRUE(fit_susie_inf) && isTRUE(twas_weights)) {
+    stop("fit_susie_inf = FALSE is not compatible with twas_weights = TRUE")
+  }
 
   # Initial PIP check
   if (pip_cutoff_to_skip != 0) {
@@ -112,17 +122,28 @@ univariate_analysis_pipeline <- function(
   st <- proc.time()
   res <- list()
 
-  message("Fitting SuSiE-inf model on input data ...")
-  message("Fitting SuSiE model initialized by SuSiE-inf ...")
-  fitted_models <- fit_susie_inf_then_susie(
-    X,
-    Y,
-    args = modifyList(
-      finemapping_extra_opts,
-      list(L = L, L_greedy = L_greedy, coverage = coverage[1])
-    )
+  susie_args <- modifyList(
+    finemapping_extra_opts,
+    list(L = L, L_greedy = L_greedy, coverage = coverage[1])
   )
-  res$susie_inf_fitted <- fitted_models[["susie_inf"]]
+  if (isTRUE(fit_susie_inf)) {
+    message("Fitting SuSiE-inf model on input data ...")
+    message("Fitting SuSiE model initialized by SuSiE-inf ...")
+    fitted_models <- fit_susie_inf_then_susie(
+      X,
+      Y,
+      args = susie_args
+    )
+    res$susie_inf_fitted <- fitted_models[["susie_inf"]]
+  } else {
+    message("Fitting SuSiE model on input data ...")
+    fitted_models <- list(
+      susie = .set_finemapping_fit_class(
+        do.call(susie, c(list(X = X, y = Y), susie_args)),
+        "susie"
+      )
+    )
+  }
   res$susie_fitted <- fitted_models[["susie"]]
 
   # Process SuSiE results
@@ -140,7 +161,9 @@ univariate_analysis_pipeline <- function(
     other_quantities = other_quantities
   )
   res <- c(res, format_finemapping_output(susie_post, primary_method = "susie"))
-  res$susie_inf_result_trimmed <- susie_post$finemapping_results$susie_inf$result_trimmed
+  if (!is.null(susie_post$finemapping_results$susie_inf)) {
+    res$susie_inf_result_trimmed <- susie_post$finemapping_results$susie_inf$result_trimmed
+  }
   res$total_time_elapsed <- proc.time() - st
 
   # TWAS weights and cross-validation
diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R
index 20747008..a671c3a8 100644
--- a/tests/testthat/test_susie_wrapper.R
+++ b/tests/testthat/test_susie_wrapper.R
@@ -765,6 +765,103 @@ test_that("format_finemapping_output does not duplicate top loci variants", {
   expect_equal(unique(out$top_loci$variant_id), paste0("v", 1:4))
 })
 
+test_that("format_finemapping_output adds top_loci_export without changing wrapper tables", {
+  variant_id <- "chr1:101:A:G"
+  result_trimmed <- list(
+    pip = setNames(0.8, variant_id),
+    alpha = matrix(1, nrow = 1, ncol = 1,
+                   dimnames = list("L1", variant_id)),
+    mu = matrix(0.8, nrow = 1, ncol = 1,
+                dimnames = list("L1", variant_id)),
+    mu2 = matrix(0.81, nrow = 1, ncol = 1,
+                 dimnames = list("L1", variant_id)),
+    V = 1,
+    X_column_scale_factors = 1,
+    X_scalar = 1,
+    y_scalar = 1
+  )
+  class(result_trimmed) <- "susie"
+  top_loci <- data.frame(
+    variant_id = variant_id,
+    betahat = 0.2,
+    sebetahat = 0.1,
+    pip_susie = 0.8,
+    CS_95_susie = 1L,
+    CS_70_susie = 0L,
+    CS_50_susie = 0L,
+    stringsAsFactors = FALSE
+  )
+  post <- list(
+    finemapping_results = list(susie = list(
+      variant_names = variant_id,
+      result_trimmed = result_trimmed,
+      top_loci_long = NULL
+    )),
+    top_loci_long = NULL,
+    top_loci = top_loci
+  )
+
+  out <- format_finemapping_output(post, "susie")
+
+  expect_true("top_loci_export" %in% names(out))
+  expect_equal(out$top_loci$betahat, 0.2)
+  expect_equal(out$top_loci_export$conditional_effect, 0.8)
+  expect_equal(out$top_loci_export$conditional_effect_se, sqrt(0.17),
+               tolerance = 1e-8)
+  expect_equal(out$top_loci_export$beta, 0.2)
+  expect_equal(out$top_loci_export$se, 0.1)
+})
+
+test_that("build_top_loci_export expands overlapping CS membership and carries n and af", {
+  variant_id <- "chr1:101:A:G"
+  result_trimmed <- list(
+    pip = setNames(0.8, variant_id),
+    alpha = matrix(1, nrow = 1, ncol = 1,
+                   dimnames = list("L1", variant_id)),
+    mu = matrix(0.8, nrow = 1, ncol = 1,
+                dimnames = list("L1", variant_id)),
+    mu2 = matrix(0.81, nrow = 1, ncol = 1,
+                 dimnames = list("L1", variant_id)),
+    V = 1,
+    X_column_scale_factors = 1
+  )
+  top_loci <- data.frame(
+    variant_id = variant_id,
+    betahat = 0.2,
+    sebetahat = 0.1,
+    pip_susie = 0.8,
+    CS_95_susie = 1L,
+    stringsAsFactors = FALSE
+  )
+  top_loci_long <- data.frame(
+    variant_id = c(variant_id, variant_id),
+    method = c("susie", "susie"),
+    coverage = c(0.95, 0.95),
+    cs = c(1L, 2L),
+    stringsAsFactors = FALSE
+  )
+  af <- setNames(0.35, variant_id)
+
+  out <- pecotmr:::build_top_loci_export(
+    top_loci = top_loci,
+    top_loci_long = top_loci_long,
+    primary_method = "susie",
+    result_trimmed = result_trimmed,
+    variant_names = variant_id,
+    gene_id = "GENE1",
+    event_id = "EVENT1",
+    n = 42,
+    af = af
+  )
+
+  expect_equal(nrow(out), 2L)
+  expect_equal(out[["cs_coverage_0.95"]], c(1L, 2L))
+  expect_equal(out$gene_ID, c("GENE1", "GENE1"))
+  expect_equal(out$event_ID, c("EVENT1", "EVENT1"))
+  expect_equal(out$n, c(42, 42))
+  expect_equal(out$af, c(0.35, 0.35))
+})
+
 .make_univariate_data <- function(seed = 42, n = 300, p = 50,
                                   effect_idx = integer(0), effect_size = NULL) {
   set.seed(seed)
diff --git a/tests/testthat/test_univariate_pipeline.R b/tests/testthat/test_univariate_pipeline.R
index 762a22d3..a8dacb99 100644
--- a/tests/testthat/test_univariate_pipeline.R
+++ b/tests/testthat/test_univariate_pipeline.R
@@ -614,6 +614,45 @@ test_that("uap: susie-inf initializes susie with matching greedy L", {
   expect_equal(unclass(result$susie_inf_fitted), fake_inf_fit)
 })
 
+test_that("uap: ordinary susie can run without susie-inf initialization", {
+  inp <- make_uap_inputs()
+  fake_fit <- make_fake_susie_fit(inp$p)
+  fake_post <- make_fake_post_result(inp$p)
+
+  captured_calls <- list()
+  local_mocked_bindings(
+    susie = function(X, y, L, L_greedy, coverage, unmappable_effects = "none",
+                     model_init = NULL, ...) {
+      captured_calls[[length(captured_calls) + 1]] <<- list(
+        L = L,
+        L_greedy = L_greedy,
+        unmappable_effects = unmappable_effects,
+        model_init = model_init
+      )
+      fake_fit
+    },
+    postprocess_finemapping_fits = function(fits, ...) {
+      expect_named(fits, "susie")
+      fake_post
+    },
+    format_finemapping_output = function(post, primary_method) post,
+  )
+
+  result <- univariate_analysis_pipeline(
+    X = inp$X, Y = inp$Y, maf = inp$maf,
+    twas_weights = FALSE, fit_susie_inf = FALSE,
+    L = 15, L_greedy = 7
+  )
+  expect_length(captured_calls, 1)
+  expect_equal(captured_calls[[1]]$L, 15)
+  expect_equal(captured_calls[[1]]$L_greedy, 7)
+  expect_equal(captured_calls[[1]]$unmappable_effects, "none")
+  expect_null(captured_calls[[1]]$model_init)
+  expect_s3_class(result$susie_fitted, "susie")
+  expect_false("susie_inf_fitted" %in% names(result))
+  expect_false("susie_inf_result_trimmed" %in% names(result))
+})
+
 test_that("uap: post-processing output is merged into result", {
   inp <- make_uap_inputs()
   fake_fit <- make_fake_susie_fit(inp$p)

From a2c5402efe22b2fc2ae56e56e9b12847f1939b81 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 00:48:56 -0500
Subject: [PATCH 02/14] remove unified top loci table changes

---
 R/susie_wrapper.R                   | 195 +---------------------------
 tests/testthat/test_susie_wrapper.R |  97 --------------
 2 files changed, 2 insertions(+), 290 deletions(-)

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index d1ecdf53..bb1e2a8f 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -267,8 +267,6 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
   )
 
   trimmed <- trim_finemapping_fit(fit, effect_idx, method, cs_tables)
-  trimmed$X_scalar <- X_scalar
-  trimmed$y_scalar <- y_scalar
 
   # Build FineMappingResult S4 object
   fm_result <- FineMappingResult(
@@ -493,186 +491,6 @@ build_top_loci_wide <- function(top_loci_long, posts) {
   out
 }
 
-build_top_loci_export <- function(top_loci, top_loci_long = NULL,
-                                  primary_method = "susie",
-                                  result_trimmed = NULL,
-                                  variant_names = NULL,
-                                  gene_id = NA_character_,
-                                  event_id = NA_character_,
-                                  n = NA_real_, af = NULL) {
-  empty <- function() {
-    data.frame(
-      "#chr" = integer(), start = integer(), end = integer(),
-      a1 = character(), a2 = character(), variant_ID = character(),
-      gene_ID = character(), event_ID = character(),
-      "cs_coverage_0.95" = integer(), "cs_coverage_0.7" = integer(),
-      "cs_coverage_0.5" = integer(), PIP = numeric(),
-      conditional_effect = numeric(), conditional_effect_se = numeric(),
-      beta = numeric(), se = numeric(), n = numeric(), af = numeric(),
-      stringsAsFactors = FALSE, check.names = FALSE
-    )
-  }
-  if (is.null(top_loci) || !is.data.frame(top_loci) || nrow(top_loci) == 0) {
-    return(empty())
-  }
-
-  top_loci <- .translate_legacy_top_loci_cs_columns(top_loci)
-  variant_col <- intersect(c("variant_ID", "variant_id"), names(top_loci))
-  if (length(variant_col) == 0) stop("top_loci is missing variant_id or variant_ID.")
-  variant_id <- as.character(top_loci[[variant_col[[1]]]])
-  n_rows <- length(variant_id)
-
-  match_numeric <- function(value) {
-    out <- rep(NA_real_, n_rows)
-    if (is.null(value) || length(value) == 0) return(out)
-    value_names <- names(value)
-    value <- suppressWarnings(as.numeric(value))
-    if (!is.null(value_names) && length(value_names) == length(value)) {
-      names(value) <- value_names
-    }
-    if (!is.null(names(value))) {
-      query <- tryCatch(normalize_variant_id(variant_id), error = function(e) variant_id)
-      names(value) <- tryCatch(normalize_variant_id(names(value)), error = function(e) names(value))
-      return(as.numeric(value[query]))
-    }
-    if (length(value) == 1) return(rep(value, n_rows))
-    if (length(value) == n_rows) return(value)
-    out
-  }
-  column_numeric <- function(columns) {
-    hit <- intersect(columns, names(top_loci))
-    if (length(hit) == 0) return(rep(NA_real_, n_rows))
-    suppressWarnings(as.numeric(top_loci[[hit[[1]]]]))
-  }
-  cs_column <- function(coverage) {
-    hit <- intersect(format_cs_column(coverage, primary_method), names(top_loci))
-    if (length(hit) > 0) return(as.integer(top_loci[[hit[[1]]]]))
-    if (any(grepl("^CS_[0-9.]+_[A-Za-z0-9_.-]+$", names(top_loci)))) {
-      return(rep(0L, n_rows))
-    }
-    rep(NA_integer_, n_rows)
-  }
-  scale_effect <- function(value) {
-    if (is.null(value) || is.null(result_trimmed)) return(value)
-    y_scalar <- suppressWarnings(as.numeric(result_trimmed$y_scalar %||% 1))
-    if (length(y_scalar) != 1 || !is.finite(y_scalar)) y_scalar <- 1
-    x_scalar <- result_trimmed$X_scalar
-    if (is.null(x_scalar)) return(value * y_scalar)
-    if (length(x_scalar) == 1) {
-      x_scalar <- rep(as.numeric(x_scalar), length(value))
-    } else if (is.null(names(x_scalar)) && !is.null(names(value)) &&
-               length(x_scalar) == length(value)) {
-      names(x_scalar) <- names(value)
-    }
-    if (!is.null(names(value)) && !is.null(names(x_scalar))) {
-      query <- tryCatch(normalize_variant_id(names(value)), error = function(e) names(value))
-      names(x_scalar) <- tryCatch(normalize_variant_id(names(x_scalar)),
-                                  error = function(e) names(x_scalar))
-      x_scalar <- as.numeric(x_scalar[query])
-    } else if (length(x_scalar) != length(value)) {
-      x_scalar <- rep(NA_real_, length(value))
-    }
-    ok <- is.finite(value) & is.finite(x_scalar) & x_scalar != 0
-    value[ok] <- value[ok] * y_scalar / x_scalar[ok]
-    value
-  }
-
-  parsed <- tryCatch(
-    suppressWarnings(parse_variant_id(variant_id)),
-    error = function(e) NULL
-  )
-  if (is.null(parsed) || nrow(parsed) != n_rows) {
-    parsed <- data.frame(
-      chrom = rep(NA_integer_, n_rows), pos = rep(NA_integer_, n_rows),
-      A2 = rep(NA_character_, n_rows), A1 = rep(NA_character_, n_rows),
-      stringsAsFactors = FALSE
-    )
-  }
-
-  pip_col <- resolve_pip_column(top_loci, primary_method)
-  effect <- effect_se <- rep(NA_real_, n_rows)
-  if (!is.null(result_trimmed)) {
-    names_from_fit <- variant_names %||% names(result_trimmed$pip) %||%
-      colnames(result_trimmed$alpha)
-    effect <- tryCatch(susieR::coef.susie(result_trimmed), error = function(e) NULL)
-    effect_se <- tryCatch(susieR::susie_get_posterior_sd(result_trimmed),
-                          error = function(e) NULL)
-    if (!is.null(effect)) {
-      if (!is.null(names_from_fit) && length(names_from_fit) == length(effect)) {
-        names(effect) <- names_from_fit
-      }
-      effect <- match_numeric(scale_effect(effect))
-    } else {
-      effect <- rep(NA_real_, n_rows)
-    }
-    if (!is.null(effect_se)) {
-      if (!is.null(names_from_fit) && length(names_from_fit) == length(effect_se)) {
-        names(effect_se) <- names_from_fit
-      }
-      effect_se <- match_numeric(abs(scale_effect(effect_se)))
-    } else {
-      effect_se <- rep(NA_real_, n_rows)
-    }
-  }
-
-  out <- data.frame(
-    "#chr" = parsed$chrom,
-    start = parsed$pos - 1L,
-    end = parsed$pos,
-    a1 = parsed$A1,
-    a2 = parsed$A2,
-    variant_ID = variant_id,
-    gene_ID = rep(gene_id, length.out = n_rows),
-    event_ID = rep(event_id, length.out = n_rows),
-    "cs_coverage_0.95" = cs_column(0.95),
-    "cs_coverage_0.7" = cs_column(0.7),
-    "cs_coverage_0.5" = cs_column(0.5),
-    PIP = if (is.null(pip_col)) rep(NA_real_, n_rows) else as.numeric(top_loci[[pip_col]]),
-    conditional_effect = effect,
-    conditional_effect_se = effect_se,
-    beta = column_numeric(c("beta", "betahat")),
-    se = column_numeric(c("se", "sebetahat")),
-    n = if (any(!is.na(column_numeric("n")))) column_numeric("n") else match_numeric(n),
-    af = if (any(!is.na(column_numeric("af")))) column_numeric("af") else match_numeric(af),
-    stringsAsFactors = FALSE,
-    check.names = FALSE
-  )
-
-  if (is.null(top_loci_long) || !is.data.frame(top_loci_long) ||
-      nrow(top_loci_long) == 0 || !all(c("variant_id", "method", "coverage", "cs") %in%
-                                       names(top_loci_long))) {
-    return(out)
-  }
-  coverage_columns <- c("0.95" = "cs_coverage_0.95",
-                        "0.7" = "cs_coverage_0.7",
-                        "0.5" = "cs_coverage_0.5")
-  rows <- lapply(seq_len(nrow(out)), function(i) {
-    row <- out[i, , drop = FALSE]
-    memberships <- lapply(names(coverage_columns), function(coverage) {
-      hits <- top_loci_long[
-        as.character(top_loci_long$variant_id) == row$variant_ID[[1]] &
-          as.character(top_loci_long$method) == primary_method &
-          abs(as.numeric(top_loci_long$coverage) - as.numeric(coverage)) < 1e-8,
-        , drop = FALSE
-      ]
-      unique(as.integer(hits$cs[!is.na(hits$cs)]))
-    })
-    names(memberships) <- coverage_columns
-    n_membership <- max(1L, vapply(memberships, length, integer(1)))
-    do.call(rbind, lapply(seq_len(n_membership), function(j) {
-      expanded <- row
-      for (column in names(memberships)) {
-        values <- memberships[[column]]
-        if (length(values) > 0) expanded[[column]] <- values[[min(j, length(values))]]
-      }
-      expanded
-    }))
-  })
-  out <- do.call(rbind, rows)
-  rownames(out) <- NULL
-  out
-}
-
 trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
   alpha <- .as_effect_matrix(fit$alpha)
   lbf_variable <- .as_lbf_matrix(fit)
@@ -728,29 +546,20 @@ trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
 #' @param post Output from \code{\link{postprocess_finemapping_fits}}.
 #' @param primary_method Method whose result should populate root-level fields.
 #' @return A list with root-level fields including \code{variant_names},
-#'   \code{susie_result_trimmed}, \code{top_loci_long}, \code{top_loci},
-#'   and the exported table \code{top_loci_export}.
+#'   \code{susie_result_trimmed}, \code{top_loci_long}, and \code{top_loci}.
 #' @export
 format_finemapping_output <- function(post, primary_method) {
   method_post <- post$finemapping_results[[primary_method]]
   if (is.null(method_post)) {
     stop("primary_method was not found in finemapping_results: ", primary_method)
   }
-  top_loci_export <- build_top_loci_export(
-    top_loci = post$top_loci,
-    top_loci_long = post$top_loci_long,
-    primary_method = primary_method,
-    result_trimmed = method_post$result_trimmed,
-    variant_names = method_post$variant_names
-  )
   keep_names <- setdiff(names(method_post), c("result_trimmed", "top_loci_long"))
   c(
     method_post[keep_names],
     list(
       susie_result_trimmed = method_post$result_trimmed,
       top_loci_long = post$top_loci_long,
-      top_loci = post$top_loci,
-      top_loci_export = top_loci_export
+      top_loci = post$top_loci
     )
   )
 }
diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R
index a671c3a8..20747008 100644
--- a/tests/testthat/test_susie_wrapper.R
+++ b/tests/testthat/test_susie_wrapper.R
@@ -765,103 +765,6 @@ test_that("format_finemapping_output does not duplicate top loci variants", {
   expect_equal(unique(out$top_loci$variant_id), paste0("v", 1:4))
 })
 
-test_that("format_finemapping_output adds top_loci_export without changing wrapper tables", {
-  variant_id <- "chr1:101:A:G"
-  result_trimmed <- list(
-    pip = setNames(0.8, variant_id),
-    alpha = matrix(1, nrow = 1, ncol = 1,
-                   dimnames = list("L1", variant_id)),
-    mu = matrix(0.8, nrow = 1, ncol = 1,
-                dimnames = list("L1", variant_id)),
-    mu2 = matrix(0.81, nrow = 1, ncol = 1,
-                 dimnames = list("L1", variant_id)),
-    V = 1,
-    X_column_scale_factors = 1,
-    X_scalar = 1,
-    y_scalar = 1
-  )
-  class(result_trimmed) <- "susie"
-  top_loci <- data.frame(
-    variant_id = variant_id,
-    betahat = 0.2,
-    sebetahat = 0.1,
-    pip_susie = 0.8,
-    CS_95_susie = 1L,
-    CS_70_susie = 0L,
-    CS_50_susie = 0L,
-    stringsAsFactors = FALSE
-  )
-  post <- list(
-    finemapping_results = list(susie = list(
-      variant_names = variant_id,
-      result_trimmed = result_trimmed,
-      top_loci_long = NULL
-    )),
-    top_loci_long = NULL,
-    top_loci = top_loci
-  )
-
-  out <- format_finemapping_output(post, "susie")
-
-  expect_true("top_loci_export" %in% names(out))
-  expect_equal(out$top_loci$betahat, 0.2)
-  expect_equal(out$top_loci_export$conditional_effect, 0.8)
-  expect_equal(out$top_loci_export$conditional_effect_se, sqrt(0.17),
-               tolerance = 1e-8)
-  expect_equal(out$top_loci_export$beta, 0.2)
-  expect_equal(out$top_loci_export$se, 0.1)
-})
-
-test_that("build_top_loci_export expands overlapping CS membership and carries n and af", {
-  variant_id <- "chr1:101:A:G"
-  result_trimmed <- list(
-    pip = setNames(0.8, variant_id),
-    alpha = matrix(1, nrow = 1, ncol = 1,
-                   dimnames = list("L1", variant_id)),
-    mu = matrix(0.8, nrow = 1, ncol = 1,
-                dimnames = list("L1", variant_id)),
-    mu2 = matrix(0.81, nrow = 1, ncol = 1,
-                 dimnames = list("L1", variant_id)),
-    V = 1,
-    X_column_scale_factors = 1
-  )
-  top_loci <- data.frame(
-    variant_id = variant_id,
-    betahat = 0.2,
-    sebetahat = 0.1,
-    pip_susie = 0.8,
-    CS_95_susie = 1L,
-    stringsAsFactors = FALSE
-  )
-  top_loci_long <- data.frame(
-    variant_id = c(variant_id, variant_id),
-    method = c("susie", "susie"),
-    coverage = c(0.95, 0.95),
-    cs = c(1L, 2L),
-    stringsAsFactors = FALSE
-  )
-  af <- setNames(0.35, variant_id)
-
-  out <- pecotmr:::build_top_loci_export(
-    top_loci = top_loci,
-    top_loci_long = top_loci_long,
-    primary_method = "susie",
-    result_trimmed = result_trimmed,
-    variant_names = variant_id,
-    gene_id = "GENE1",
-    event_id = "EVENT1",
-    n = 42,
-    af = af
-  )
-
-  expect_equal(nrow(out), 2L)
-  expect_equal(out[["cs_coverage_0.95"]], c(1L, 2L))
-  expect_equal(out$gene_ID, c("GENE1", "GENE1"))
-  expect_equal(out$event_ID, c("EVENT1", "EVENT1"))
-  expect_equal(out$n, c(42, 42))
-  expect_equal(out$af, c(0.35, 0.35))
-})
-
 .make_univariate_data <- function(seed = 42, n = 300, p = 50,
                                   effect_idx = integer(0), effect_size = NULL) {
   set.seed(seed)

From a04bc21840dd21d5bb87c45a9dd74c0fc4a3b547 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 02:54:13 -0500
Subject: [PATCH 03/14] Unify compact top-loci export

Unify compact top-loci export at one wrapper boundary
Major:
- build_top_loci_long now emits the full annotated long: posterior
  conditional_effect / conditional_effect_se, cs_purity, per-fit n /
  variant_number / gene_id, and caller-supplied region / event_ID.
- Adds build_top_loci_export(long): pure projection of the annotated
  long to the fixed 21-column compact export schema; stops on missing
  required columns rather than silently filling NA.
Minor:
- build_top_loci_long gains 3 default-NULL params (data_x, data_y,
  other_quantities); existing callers omitting them are unchanged.
- .postprocess_finemapping_fit_common passes those through unchanged.
- other_quantities reserves two subfield names for the unified export:
  region, condition_id (alongside the existing dropped_samples use).
- .empty_top_loci_long extended with the new columns so empty and
  populated long share one schema.
- Adds focused unit tests covering the new columns, the per-fit
  constants, the export helper schema, and the missing-column error.
Loader, univariate_analysis_pipeline, and format_finemapping_output
are unchanged. transMap-side adoption is out of scope of this commit.
---
 R/susie_wrapper.R                   | 215 ++++++++++++++++++++++++++--
 tests/testthat/test_susie_wrapper.R | 184 ++++++++++++++++++++++++
 2 files changed, 390 insertions(+), 9 deletions(-)

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index bb1e2a8f..6b492b30 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -263,7 +263,8 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
   )
   top_loci_long <- build_top_loci_long(
     fit, cs_tables, variant_names = variant_names, sumstats = sumstats,
-    maf = maf, method = method, signal_cutoff = signal_cutoff
+    maf = maf, method = method, signal_cutoff = signal_cutoff,
+    data_x = data_x, data_y = data_y, other_quantities = other_quantities
   )
 
   trimmed <- trim_finemapping_fit(fit, effect_idx, method, cs_tables)
@@ -408,9 +409,36 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", "
 }
 
 build_top_loci_long <- function(fit, cs_tables, variant_names, sumstats = NULL,
-                                maf = NULL, method, signal_cutoff = 0.1) {
+                                maf = NULL, method, signal_cutoff = 0.1,
+                                data_x = NULL, data_y = NULL,
+                                other_quantities = NULL) {
   if (length(cs_tables) == 0) return(.empty_top_loci_long())
   coverage_values <- attr(cs_tables, "coverage")
+
+  # Per-fit constants (broadcast to every row of this fit's slice).
+  data_y_mat <- if (!is.null(data_y)) as.matrix(data_y) else NULL
+  fit_n              <- if (is.null(data_y_mat)) NA_integer_ else nrow(data_y_mat)
+  fit_variant_number <- if (!is.null(data_x)) ncol(data_x) else NA_integer_
+  fit_gene_id        <- if (!is.null(data_y_mat) && !is.null(colnames(data_y_mat))) {
+    colnames(data_y_mat)[1]
+  } else NA_character_
+  fit_region   <- other_quantities$region
+  fit_event_id <- if (!is.null(other_quantities$condition_id) &&
+                      !is.na(fit_gene_id) && nzchar(fit_gene_id)) {
+    paste(other_quantities$condition_id, fit_gene_id, sep = "_")
+  } else NULL
+
+  # Per-variant posterior effect and SE, computed once for all variants.
+  alpha <- as.matrix(fit$alpha)
+  mu    <- if (!is.null(fit$mu))  as.matrix(fit$mu)  else NULL
+  mu2   <- if (!is.null(fit$mu2)) as.matrix(fit$mu2) else NULL
+  posterior_effect <- if (!is.null(mu) && all(dim(alpha) == dim(mu))) {
+    colSums(alpha * mu)
+  } else rep(NA_real_, length(variant_names))
+  posterior_effect_se <- if (!is.null(mu2) && all(dim(alpha) == dim(mu2))) {
+    sqrt(pmax(colSums(alpha * mu2) - posterior_effect^2, 0))
+  } else rep(NA_real_, length(variant_names))
+
   rows <- lapply(seq_along(cs_tables), function(i) {
     cs_table <- cs_tables[[i]]
     cov <- coverage_values[[i]]
@@ -419,14 +447,47 @@ build_top_loci_long <- function(fit, cs_tables, variant_names, sumstats = NULL,
     if (is.null(cs_info) || nrow(cs_info) == 0) return(NULL)
     idx <- cs_info$variant_idx
     optional_cols <- .top_loci_optional_columns(idx, sumstats, maf)
+
+    # cs_purity: prefer susieR's sets$purity$min.abs.corr; fall back to
+    # cs_corr when purity is unavailable; PIP-only retained rows get 0.
+    sets_purity <- cs_table$sets$purity
+    cs_purity_per_cs <- if (!is.null(sets_purity) &&
+                            "min.abs.corr" %in% names(sets_purity)) {
+      as.numeric(sets_purity$min.abs.corr)
+    } else if (!is.null(cs_table$cs_corr)) {
+      vapply(seq_along(cs_table$cs_corr), function(j) {
+        m <- cs_table$cs_corr[[j]]
+        if (is.null(m)) return(NA_real_)
+        if (!is.matrix(m) || nrow(m) <= 1) return(1)
+        min(abs(m[upper.tri(m)]))
+      }, numeric(1))
+    } else {
+      rep(NA_real_, length(cs_table$sets$cs))
+    }
+    cs_purity_per_row <- vapply(cs_info$cs_idx, function(cs_i) {
+      if (is.na(cs_i) || cs_i == 0L) return(0)
+      if (cs_i > length(cs_purity_per_cs)) return(NA_real_)
+      cs_purity_per_cs[cs_i]
+    }, numeric(1))
+
     base <- data.frame(
-      variant_id = variant_names[idx],
-      method = method,
-      coverage = cov,
-      cs = as.integer(cs_info$cs_idx),
-      pip = as.numeric(fit$pip[idx]),
-      stringsAsFactors = FALSE
+      variant_id            = variant_names[idx],
+      method                = method,
+      coverage              = cov,
+      cs                    = as.integer(cs_info$cs_idx),
+      pip                   = as.numeric(fit$pip[idx]),
+      conditional_effect    = posterior_effect[idx],
+      conditional_effect_se = posterior_effect_se[idx],
+      cs_purity             = cs_purity_per_row,
+      stringsAsFactors      = FALSE
     )
+    # Per-fit constants broadcast to every row.
+    base$n              <- fit_n
+    base$variant_number <- fit_variant_number
+    base$gene_id        <- fit_gene_id
+    if (!is.null(fit_region))   base$region   <- fit_region
+    if (!is.null(fit_event_id)) base$event_ID <- fit_event_id
+
     if (ncol(optional_cols) > 0) cbind(base, optional_cols) else base
   })
   out <- bind_rows(rows)
@@ -436,7 +497,12 @@ build_top_loci_long <- function(fit, cs_tables, variant_names, sumstats = NULL,
 .empty_top_loci_long <- function() {
   data.frame(
     variant_id = character(), method = character(), coverage = numeric(),
-    cs = integer(), pip = numeric(), stringsAsFactors = FALSE
+    cs = integer(), pip = numeric(),
+    conditional_effect = numeric(), conditional_effect_se = numeric(),
+    cs_purity = numeric(),
+    n = integer(), variant_number = integer(),
+    gene_id = character(),
+    stringsAsFactors = FALSE
   )
 }
 
@@ -491,6 +557,137 @@ build_top_loci_wide <- function(top_loci_long, posts) {
   out
 }
 
+#' Build the unified compact top-loci export table.
+#'
+#' Projects an annotated \code{top_loci_long} (as produced by
+#' \code{postprocess_finemapping_fits()} when called with \code{data_x},
+#' \code{data_y}, and an \code{other_quantities} list containing
+#' \code{region} and \code{condition_id}) into the fixed-order compact
+#' export schema used by the unified fine-mapping output.
+#'
+#' The output column order is exactly: \code{#chr}, \code{start}, \code{end},
+#' \code{a1}, \code{a2}, \code{variant_ID}, \code{gene_ID}, \code{event_ID},
+#' \code{cs_coverage_0.95}, \code{cs_coverage_0.7}, \code{cs_coverage_0.5},
+#' \code{cs_purity}, \code{PIP}, \code{conditional_effect},
+#' \code{conditional_effect_se}, \code{analysis_region},
+#' \code{analysis_variants_number}, \code{beta}, \code{se}, \code{n},
+#' \code{maf}.
+#'
+#' @param long An annotated \code{top_loci_long} data frame. Must contain
+#'   the columns \code{variant_id}, \code{pip}, \code{coverage}, \code{cs},
+#'   \code{conditional_effect}, \code{conditional_effect_se},
+#'   \code{cs_purity}, \code{gene_id}, \code{n}, \code{variant_number},
+#'   \code{region}, \code{event_ID}, \code{betahat}, \code{sebetahat},
+#'   \code{maf}. Missing any required column raises an explicit error
+#'   rather than silently filling \code{NA}.
+#' @return A data frame with the fixed compact-export schema.
+#' @export
+build_top_loci_export <- function(long) {
+  required <- c("variant_id", "pip", "coverage", "cs",
+                "conditional_effect", "conditional_effect_se", "cs_purity",
+                "gene_id", "n", "variant_number", "region", "event_ID",
+                "betahat", "sebetahat", "maf")
+  if (is.null(long) || !is.data.frame(long)) {
+    stop("build_top_loci_export: `long` must be a data frame.")
+  }
+  if (nrow(long) == 0) return(.empty_top_loci_export())
+  missing_cols <- setdiff(required, names(long))
+  if (length(missing_cols) > 0) {
+    stop("build_top_loci_export: `long` is missing required columns: ",
+         paste(missing_cols, collapse = ", "))
+  }
+
+  parsed <- tryCatch(
+    parse_variant_id(long$variant_id),
+    error = function(e) {
+      stop("build_top_loci_export: parse_variant_id failed: ",
+           conditionMessage(e))
+    }
+  )
+  if (is.null(parsed) || nrow(parsed) != nrow(long)) {
+    stop("build_top_loci_export: parse_variant_id did not return one row ",
+         "per input row.")
+  }
+
+  # Build a key per (variant_id, gene_id, cs) — one row per CS membership
+  # at the export grain. PIP-only retained variants have cs = 0.
+  key_grid <- unique(long[, c("variant_id", "gene_id", "cs"), drop = FALSE])
+  rownames(key_grid) <- NULL
+
+  coverage_targets <- c(0.95, 0.7, 0.5)
+  cs_coverage_cols <- paste0("cs_coverage_", coverage_targets)
+
+  pick_first <- function(col, default = NA) {
+    vapply(seq_len(nrow(key_grid)), function(i) {
+      sel <- long$variant_id == key_grid$variant_id[i] &
+             long$gene_id    == key_grid$gene_id[i] &
+             long$cs         == key_grid$cs[i]
+      hit <- long[[col]][sel]
+      if (length(hit) == 0) default else hit[[1]]
+    }, default)
+  }
+
+  cs_coverage_mat <- vapply(seq_len(nrow(key_grid)), function(i) {
+    vapply(coverage_targets, function(cov) {
+      sel <- long$variant_id == key_grid$variant_id[i] &
+             long$gene_id    == key_grid$gene_id[i] &
+             long$cs         == key_grid$cs[i] &
+             long$coverage   == cov
+      if (any(sel)) as.integer(long$cs[sel][[1]]) else 0L
+    }, integer(1))
+  }, integer(length(coverage_targets)))
+  cs_coverage_mat <- if (is.null(dim(cs_coverage_mat))) {
+    matrix(cs_coverage_mat, nrow = length(coverage_targets))
+  } else cs_coverage_mat
+  cs_coverage_mat <- t(cs_coverage_mat)
+  colnames(cs_coverage_mat) <- cs_coverage_cols
+
+  # Resolve per-row variant coordinates using a lookup into parsed.
+  variant_first_idx <- match(key_grid$variant_id, long$variant_id)
+
+  out <- data.frame(
+    "#chr"                   = parsed$chrom[variant_first_idx],
+    start                    = parsed$pos[variant_first_idx] - 1L,
+    end                      = parsed$pos[variant_first_idx],
+    a1                       = parsed$A1[variant_first_idx],
+    a2                       = parsed$A2[variant_first_idx],
+    variant_ID               = key_grid$variant_id,
+    gene_ID                  = key_grid$gene_id,
+    event_ID                 = pick_first("event_ID", NA_character_),
+    "cs_coverage_0.95"       = cs_coverage_mat[, "cs_coverage_0.95"],
+    "cs_coverage_0.7"        = cs_coverage_mat[, "cs_coverage_0.7"],
+    "cs_coverage_0.5"        = cs_coverage_mat[, "cs_coverage_0.5"],
+    cs_purity                = pick_first("cs_purity", NA_real_),
+    PIP                      = pick_first("pip", NA_real_),
+    conditional_effect       = pick_first("conditional_effect", NA_real_),
+    conditional_effect_se    = pick_first("conditional_effect_se", NA_real_),
+    analysis_region          = pick_first("region", NA_character_),
+    analysis_variants_number = pick_first("variant_number", NA_integer_),
+    beta                     = pick_first("betahat", NA_real_),
+    se                       = pick_first("sebetahat", NA_real_),
+    n                        = pick_first("n", NA_integer_),
+    maf                      = pick_first("maf", NA_real_),
+    stringsAsFactors         = FALSE,
+    check.names              = FALSE
+  )
+  rownames(out) <- NULL
+  out
+}
+
+.empty_top_loci_export <- function() {
+  data.frame(
+    "#chr" = integer(), start = integer(), end = integer(),
+    a1 = character(), a2 = character(), variant_ID = character(),
+    gene_ID = character(), event_ID = character(),
+    "cs_coverage_0.95" = integer(), "cs_coverage_0.7" = integer(),
+    "cs_coverage_0.5" = integer(), cs_purity = numeric(), PIP = numeric(),
+    conditional_effect = numeric(), conditional_effect_se = numeric(),
+    analysis_region = character(), analysis_variants_number = integer(),
+    beta = numeric(), se = numeric(), n = integer(), maf = numeric(),
+    stringsAsFactors = FALSE, check.names = FALSE
+  )
+}
+
 trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
   alpha <- .as_effect_matrix(fit$alpha)
   lbf_variable <- .as_lbf_matrix(fit)
diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R
index 20747008..2b4a2c09 100644
--- a/tests/testthat/test_susie_wrapper.R
+++ b/tests/testthat/test_susie_wrapper.R
@@ -910,3 +910,187 @@ test_that("top_loci has symmetric CS_<cov>_<method> columns even for methods wit
     expect_true(all(post$top_loci[[col]] == 0L), info = col)
   }
 })
+
+# ============================================================================
+# Unified top-loci annotated long + build_top_loci_export
+# ============================================================================
+
+# Helper: build a minimal fake fit + cs_tables that build_top_loci_long accepts.
+.make_fake_inputs_for_annotated_long <- function(variant_ids,
+                                                 cs_membership,
+                                                 cs_purity_value = 0.85,
+                                                 n_samples = 100,
+                                                 n_variants = NULL,
+                                                 pip = NULL) {
+  p <- length(variant_ids)
+  if (is.null(n_variants)) n_variants <- p
+  if (is.null(pip)) pip <- seq(0.6, 0.9, length.out = p)
+  alpha <- matrix(0, nrow = 1, ncol = p,
+                  dimnames = list(NULL, variant_ids))
+  alpha[1, ] <- pip / sum(pip)
+  mu <- matrix(0.5, nrow = 1, ncol = p,
+               dimnames = list(NULL, variant_ids))
+  mu2 <- mu^2 + 0.1
+  fit <- list(pip = setNames(pip, variant_ids), alpha = alpha,
+              mu = mu, mu2 = mu2)
+  purity_df <- data.frame(min.abs.corr = cs_purity_value,
+                          mean.abs.corr = cs_purity_value,
+                          median.abs.corr = cs_purity_value)
+  cs_table <- list(
+    sets = list(cs = list(L1 = cs_membership),
+                cs_index = 1L,
+                requested_coverage = 0.95,
+                purity = purity_df),
+    cs_corr = list(matrix(c(1, cs_purity_value, cs_purity_value, 1),
+                          nrow = 2)),
+    pip = fit$pip
+  )
+  cs_tables <- list(cs_table)
+  attr(cs_tables, "coverage") <- 0.95
+  X <- matrix(0, nrow = n_samples, ncol = n_variants)
+  rownames(X) <- paste0("sample", seq_len(n_samples))
+  if (n_variants == length(variant_ids)) colnames(X) <- variant_ids
+  Y <- matrix(0, nrow = n_samples, ncol = 1,
+              dimnames = list(paste0("sample", seq_len(n_samples)),
+                              "ENSG00000179403"))
+  list(fit = fit, cs_tables = cs_tables, variant_names = variant_ids,
+       data_x = X, data_y = Y)
+}
+
+test_that("build_top_loci_long emits annotated columns from fit + cs_tables", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
+  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
+                                              cs_membership = c(1L, 2L))
+  long <- pecotmr:::build_top_loci_long(
+    fit = inp$fit, cs_tables = inp$cs_tables,
+    variant_names = inp$variant_names,
+    sumstats = list(betahat = c(0.2, -0.1),
+                    sebetahat = c(0.05, 0.04),
+                    z = c(4, -2.5)),
+    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05
+  )
+  expect_true(all(c("conditional_effect", "conditional_effect_se",
+                    "cs_purity") %in% names(long)))
+  expect_equal(length(unique(long$cs_purity)), 1L)
+  expect_equal(round(unique(long$cs_purity), 3), 0.85)
+  expect_true(all(is.finite(long$conditional_effect)))
+  expect_true(all(long$conditional_effect_se >= 0))
+})
+
+test_that("build_top_loci_long writes per-fit constants from data_x/data_y/other_quantities", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
+  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
+                                              cs_membership = c(1L, 2L),
+                                              n_samples = 419,
+                                              n_variants = 11332)
+  other_q <- list(
+    dropped_samples = list(X = character(), y = character()),
+    region          = "chr10:10823338-14348298",
+    condition_id    = "Ast_DeJager_eQTL"
+  )
+  long <- pecotmr:::build_top_loci_long(
+    fit = inp$fit, cs_tables = inp$cs_tables,
+    variant_names = inp$variant_names,
+    sumstats = list(betahat = c(0.2, -0.1),
+                    sebetahat = c(0.05, 0.04),
+                    z = c(4, -2.5)),
+    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05,
+    data_x = inp$data_x, data_y = inp$data_y, other_quantities = other_q
+  )
+  expect_equal(unique(long$n), 419L)
+  expect_equal(unique(long$variant_number), 11332L)
+  expect_equal(unique(long$gene_id), "ENSG00000179403")
+  expect_equal(unique(long$region), "chr10:10823338-14348298")
+  expect_equal(unique(long$event_ID), "Ast_DeJager_eQTL_ENSG00000179403")
+})
+
+test_that("build_top_loci_long omits region/event_ID columns when caller does not supply them", {
+  variant_ids <- c("chr1:100:A:G")
+  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
+                                              cs_membership = 1L)
+  long <- pecotmr:::build_top_loci_long(
+    fit = inp$fit, cs_tables = inp$cs_tables,
+    variant_names = inp$variant_names,
+    sumstats = list(betahat = 0.2, sebetahat = 0.05, z = 4),
+    maf = 0.10, method = "susie", signal_cutoff = 0.05,
+    data_x = inp$data_x, data_y = inp$data_y, other_quantities = NULL
+  )
+  expect_false("region" %in% names(long))
+  expect_false("event_ID" %in% names(long))
+  expect_true("n" %in% names(long))
+  expect_true("variant_number" %in% names(long))
+  expect_true("gene_id" %in% names(long))
+})
+
+test_that("build_top_loci_long stays backward-compatible when data_x/data_y/other_quantities are omitted", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
+  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
+                                              cs_membership = c(1L, 2L))
+  long <- pecotmr:::build_top_loci_long(
+    fit = inp$fit, cs_tables = inp$cs_tables,
+    variant_names = inp$variant_names,
+    sumstats = list(betahat = c(0.2, -0.1),
+                    sebetahat = c(0.05, 0.04),
+                    z = c(4, -2.5)),
+    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05
+  )
+  # n / variant_number / gene_id columns exist but are NA because no
+  # data_x / data_y were supplied; region / event_ID are omitted entirely.
+  expect_true(all(is.na(long$n)))
+  expect_true(all(is.na(long$variant_number)))
+  expect_true(all(is.na(long$gene_id)))
+  expect_false("region" %in% names(long))
+  expect_false("event_ID" %in% names(long))
+})
+
+test_that("build_top_loci_export errors when required columns are missing", {
+  bad <- data.frame(variant_id = "chr1:100:A:G", pip = 0.9,
+                    coverage = 0.95, cs = 1L,
+                    stringsAsFactors = FALSE)
+  expect_error(build_top_loci_export(bad),
+               "missing required columns")
+})
+
+test_that("build_top_loci_export returns the fixed 21-column schema on an empty long", {
+  empty <- pecotmr:::.empty_top_loci_long()
+  out <- build_top_loci_export(empty)
+  expected <- c("#chr", "start", "end", "a1", "a2", "variant_ID", "gene_ID",
+                "event_ID", "cs_coverage_0.95", "cs_coverage_0.7",
+                "cs_coverage_0.5", "cs_purity", "PIP", "conditional_effect",
+                "conditional_effect_se", "analysis_region",
+                "analysis_variants_number", "beta", "se", "n", "maf")
+  expect_equal(names(out), expected)
+  expect_equal(nrow(out), 0L)
+})
+
+test_that("build_top_loci_export projects an annotated long into the fixed compact schema", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
+  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
+                                              cs_membership = c(1L, 2L),
+                                              n_samples = 419,
+                                              n_variants = 11332)
+  other_q <- list(region = "chr10:10823338-14348298",
+                  condition_id = "Ast_DeJager_eQTL")
+  long <- pecotmr:::build_top_loci_long(
+    fit = inp$fit, cs_tables = inp$cs_tables,
+    variant_names = inp$variant_names,
+    sumstats = list(betahat = c(0.2, -0.1),
+                    sebetahat = c(0.05, 0.04),
+                    z = c(4, -2.5)),
+    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05,
+    data_x = inp$data_x, data_y = inp$data_y, other_quantities = other_q
+  )
+  out <- build_top_loci_export(long)
+  expected_cols <- c("#chr", "start", "end", "a1", "a2", "variant_ID",
+                     "gene_ID", "event_ID", "cs_coverage_0.95",
+                     "cs_coverage_0.7", "cs_coverage_0.5", "cs_purity",
+                     "PIP", "conditional_effect", "conditional_effect_se",
+                     "analysis_region", "analysis_variants_number",
+                     "beta", "se", "n", "maf")
+  expect_equal(names(out), expected_cols)
+  expect_equal(out$gene_ID[[1]], "ENSG00000179403")
+  expect_equal(out$event_ID[[1]], "Ast_DeJager_eQTL_ENSG00000179403")
+  expect_equal(out$analysis_region[[1]], "chr10:10823338-14348298")
+  expect_equal(out$analysis_variants_number[[1]], 11332L)
+  expect_equal(out$n[[1]], 419L)
+})

From 9b39c36abbf8b5eafaa72fe5cd94e23dc150cdf1 Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 07:59:12 +0000
Subject: [PATCH 04/14] Update documentation

---
 NAMESPACE                           |  1 +
 man/build_top_loci_export.Rd        | 36 +++++++++++++++++++++++++++++
 man/univariate_analysis_pipeline.Rd |  9 ++++++--
 3 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 man/build_top_loci_export.Rd

diff --git a/NAMESPACE b/NAMESPACE
index cd43db4f..417b054e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,6 +25,7 @@ export(bayes_c_weights)
 export(bayes_l_weights)
 export(bayes_n_weights)
 export(bayes_r_weights)
+export(build_top_loci_export)
 export(check_ld)
 export(clean_context_names)
 export(coloc_post_processor)
diff --git a/man/build_top_loci_export.Rd b/man/build_top_loci_export.Rd
new file mode 100644
index 00000000..171e52c9
--- /dev/null
+++ b/man/build_top_loci_export.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/susie_wrapper.R
+\name{build_top_loci_export}
+\alias{build_top_loci_export}
+\title{Build the unified compact top-loci export table.}
+\usage{
+build_top_loci_export(long)
+}
+\arguments{
+\item{long}{An annotated \code{top_loci_long} data frame. Must contain
+the columns \code{variant_id}, \code{pip}, \code{coverage}, \code{cs},
+\code{conditional_effect}, \code{conditional_effect_se},
+\code{cs_purity}, \code{gene_id}, \code{n}, \code{variant_number},
+\code{region}, \code{event_ID}, \code{betahat}, \code{sebetahat},
+\code{maf}. Missing any required column raises an explicit error
+rather than silently filling \code{NA}.}
+}
+\value{
+A data frame with the fixed compact-export schema.
+}
+\description{
+Projects an annotated \code{top_loci_long} (as produced by
+\code{postprocess_finemapping_fits()} when called with \code{data_x},
+\code{data_y}, and an \code{other_quantities} list containing
+\code{region} and \code{condition_id}) into the fixed-order compact
+export schema used by the unified fine-mapping output.
+}
+\details{
+The output column order is exactly: \code{#chr}, \code{start}, \code{end},
+\code{a1}, \code{a2}, \code{variant_ID}, \code{gene_ID}, \code{event_ID},
+\code{cs_coverage_0.95}, \code{cs_coverage_0.7}, \code{cs_coverage_0.5},
+\code{cs_purity}, \code{PIP}, \code{conditional_effect},
+\code{conditional_effect_se}, \code{analysis_region},
+\code{analysis_variants_number}, \code{beta}, \code{se}, \code{n},
+\code{maf}.
+}
diff --git a/man/univariate_analysis_pipeline.Rd b/man/univariate_analysis_pipeline.Rd
index c6960862..2f54b4b5 100644
--- a/man/univariate_analysis_pipeline.Rd
+++ b/man/univariate_analysis_pipeline.Rd
@@ -23,6 +23,7 @@ univariate_analysis_pipeline(
   coverage = c(0.95, 0.7, 0.5),
   min_abs_corr = 0.8,
   finemapping_extra_opts = list(refine = TRUE),
+  fit_susie_inf = TRUE,
   twas_weights = TRUE,
   sample_partition = NULL,
   max_cv_variants = -1,
@@ -71,6 +72,10 @@ which is stricter than the susieR default of 0.5.}
 SuSiE-inf is always fitted with \code{refine = FALSE}; the ordinary SuSiE
 fit keeps these options and is initialized with \code{model_init}.}
 
+\item{fit_susie_inf}{Whether to fit SuSiE-inf before ordinary SuSiE. Default
+is TRUE for existing pipeline compatibility. If FALSE, ordinary SuSiE is
+fitted directly and SuSiE-inf fitted objects/results are not returned.}
+
 \item{twas_weights}{Whether to compute TWAS weights. Default is TRUE.}
 
 \item{sample_partition}{Optional data frame with Sample and Fold columns for cross-validation. Default is NULL.}
@@ -88,6 +93,6 @@ A list containing the univariate analysis results.
 }
 \description{
 This function performs univariate analysis for fine-mapping and Transcriptome-Wide Association Study (TWAS)
-with optional cross-validation. Fine-mapping fits SuSiE-inf first and then
-fits SuSiE initialized from the SuSiE-inf result.
+with optional cross-validation. By default, fine-mapping fits SuSiE-inf first
+and then fits SuSiE initialized from the SuSiE-inf result.
 }

From 622244a5b33e3ca599d2717a52464973b5a626dd Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 10:02:27 -0500
Subject: [PATCH 05/14] unify top loci export

---
 R/susie_wrapper.R                   | 554 +++++++++++++-----------
 tests/testthat/test_susie_wrapper.R | 648 ++++++++++++++++------------
 2 files changed, 676 insertions(+), 526 deletions(-)

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index 6b492b30..8a731482 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -168,11 +168,13 @@ fit_susie_inf_then_susie <- function(X, y, args = list(),
 #' @param other_quantities Optional list carried into each method result.
 #' @param prior_eff_tol Tolerance for retaining effects by prior variance.
 #' @param min_abs_corr Minimum absolute correlation for credible-set purity.
-#' @return A list with \code{finemapping_results}, \code{top_loci_long}, and
-#'   \code{top_loci}. The long table is lossless, with one row per
-#'   variant-method-coverage-CS membership. The wide table stores one row per
-#'   variant, method-specific \code{pip_<method>} columns, method-specific
-#'   \code{CS_<coverage>_<method>} columns, and \code{model_source}.
+#' @return A list with \code{finemapping_results} (per-method post-processed
+#'   objects, each carrying a trimmed fit and method-specific intermediates)
+#'   and the single unified \code{top_loci} table in the fixed 22-column
+#'   shape (see \code{\link{build_top_loci}}). Per-method contributions are
+#'   produced by \code{build_top_loci()} once per method and row-bound into
+#'   this single \code{top_loci}. There is no separately exposed
+#'   \code{top_loci_long} or wide-format \code{top_loci}.
 #' @export
 postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL,
                                          X_scalar = 1, y_scalar = 1,
@@ -188,6 +190,9 @@ postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL,
     stop("fits must be a named list; names define method identity.")
   }
 
+  # One method for-loop: each method calls build_top_loci() once per fit; the
+  # per-method 22-column contributions are row-bound below into the single
+  # final `top_loci` table. There is no separately exposed long or wide table.
   posts <- lapply(names(fits), function(method) {
     fit <- .set_finemapping_fit_class(fits[[method]], method)
     postprocess_finemapping_fit(
@@ -200,16 +205,21 @@ postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL,
   })
   names(posts) <- names(fits)
 
-  top_loci_long <- bind_rows(lapply(posts, function(x) x$top_loci_long))
+  per_method <- lapply(posts, function(x) x$top_loci)
+  per_method <- per_method[!vapply(per_method, is.null, logical(1))]
+  top_loci <- if (length(per_method) == 0L) {
+    .empty_top_loci()
+  } else {
+    do.call(rbind, per_method)
+  }
+  rownames(top_loci) <- NULL
   posts <- lapply(posts, function(x) {
-    x$top_loci_long <- NULL
+    x$top_loci <- NULL
     x
   })
-  top_loci <- build_top_loci_wide(top_loci_long, posts)
 
   list(
     finemapping_results = posts,
-    top_loci_long = if (nrow(top_loci_long) > 0) top_loci_long else NULL,
     top_loci = top_loci
   )
 }
@@ -261,7 +271,7 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
     secondary_coverage = secondary_coverage, method = method,
     cs_input = cs_input, min_abs_corr = min_abs_corr
   )
-  top_loci_long <- build_top_loci_long(
+  top_loci <- build_top_loci(
     fit, cs_tables, variant_names = variant_names, sumstats = sumstats,
     maf = maf, method = method, signal_cutoff = signal_cutoff,
     data_x = data_x, data_y = data_y, other_quantities = other_quantities
@@ -269,15 +279,18 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
 
   trimmed <- trim_finemapping_fit(fit, effect_idx, method, cs_tables)
 
-  # Build FineMappingResult S4 object
+  # Build FineMappingResult S4 object. The S4 contract (validity check,
+  # vcf_writer, getPIP, getCS) still expects `variant_id`, `pip`, and an
+  # integer `cs` column on the slot. To avoid rippling renames into
+  # AllClasses / AllMethods / vcf_writer for this change, we project the
+  # new 22-column `top_loci` into the legacy slot shape here, in
+  # susie_wrapper only. The wrapper-facing `top_loci` returned to callers
+  # is unchanged.
+  s4_top_loci <- .top_loci_for_s4_slot(top_loci)
   fm_result <- FineMappingResult(
     variant_names = variant_names,
     trimmed_fit = trimmed,
-    top_loci = if (is.null(top_loci_long) || nrow(top_loci_long) == 0) {
-      data.frame(variant_id = character(0), method = character(0))
-    } else {
-      top_loci_long
-    },
+    top_loci = s4_top_loci,
     method = method,
     sumstats = sumstats
   )
@@ -286,7 +299,7 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
   res <- list(
     variant_names = variant_names,
     result_trimmed = trimmed,
-    top_loci_long = top_loci_long,
+    top_loci = top_loci,
     finemapping_result = fm_result
   )
   if (!is.null(sumstats)) res$sumstats <- sumstats
@@ -408,25 +421,94 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", "
   out
 }
 
-build_top_loci_long <- function(fit, cs_tables, variant_names, sumstats = NULL,
-                                maf = NULL, method, signal_cutoff = 0.1,
-                                data_x = NULL, data_y = NULL,
-                                other_quantities = NULL) {
-  if (length(cs_tables) == 0) return(.empty_top_loci_long())
+#' Build the unified single top-loci table for one fit and one method.
+#'
+#' Produces the per-fit, per-method contribution to the unified \code{top_loci}
+#' table in the fixed 22-column shape. The outer
+#' \code{postprocess_finemapping_fits()} loop calls this once per method per fit
+#' and row-binds the results into the single final \code{top_loci} table that is
+#' exposed by \code{format_finemapping_output()}.
+#'
+#' This function replaces the previous \code{build_top_loci_long()},
+#' \code{build_top_loci_wide()}, and \code{build_top_loci_export()} trio.
+#' There is no separately exposed long or wide table.
+#'
+#' The output column order is exactly (22 columns):
+#' \code{#chr}, \code{start}, \code{end}, \code{a1}, \code{a2},
+#' \code{variant}, \code{gene}, \code{event},
+#' \code{n}, \code{maf}, \code{beta}, \code{se},
+#' \code{pip}, \code{posterior_effect_mean}, \code{posterior_effect_se},
+#' \code{cs_95}, \code{cs_70}, \code{cs_50}, \code{cs_95_purity},
+#' \code{method}, \code{grange_start}, \code{grange_end}.
+#'
+#' The \code{cs_95}, \code{cs_70}, \code{cs_50} columns are character strings of
+#' the form \code{"<method>_<cs_index>"} where each method numbers its credible
+#' sets independently starting at 1. Variants retained by the PIP cutoff but not
+#' assigned to any credible set at the given coverage use \code{"<method>_0"}.
+#' \code{cs_95_purity} is the 0.95-coverage credible-set purity for the row's
+#' \code{(method, cs_95)}; rows whose \code{cs_95} is \code{"<method>_0"} carry
+#' \code{cs_95_purity = 0}.
+#'
+#' Row uniqueness inside this function's output is one row per
+#' \code{(variant, gene, cs_membership)} at the given \code{method}. Overlapping
+#' credible-set membership for the same method produces one row per CS, so the
+#' overlapping-CS contract is preserved.
+#'
+#' @param fit A fitted SuSiE-family object (must expose \code{alpha},
+#'   \code{mu}, \code{mu2}, \code{pip}).
+#' @param cs_tables A list of CS tables (one per coverage), as produced by
+#'   \code{compute_cs_tables()}.
+#' @param variant_names Character vector of variant IDs in
+#'   \code{chr:pos:A2:A1} form, length equal to the number of variants in the
+#'   fit. Used to construct \code{variant}, \code{#chr}, \code{start},
+#'   \code{end}, \code{a1}, \code{a2}.
+#' @param sumstats Optional marginal-association summary statistics
+#'   (\code{betahat}, \code{sebetahat}) used to fill \code{beta} and \code{se}.
+#' @param maf Optional numeric vector of minor-allele frequencies.
+#' @param method Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Used
+#'   to construct the per-method \code{"<method>_<cs_index>"} strings and the
+#'   \code{method} column. Required.
+#' @param signal_cutoff PIP cutoff for retaining PIP-only (non-CS) variants.
+#' @param data_x Optional regional genotype matrix; used only for sample-count
+#'   shape checks.
+#' @param data_y Optional regional phenotype matrix; \code{nrow(data_y)} fills
+#'   \code{n}, \code{colnames(data_y)[1]} fills \code{gene}.
+#' @param other_quantities Optional list with reserved subfields
+#'   \code{region} (e.g. \code{"chr1:100-200"}) and \code{condition_id}
+#'   (e.g. \code{"Ast_DeJager_eQTL"}); used to fill \code{grange_start},
+#'   \code{grange_end}, and to compose \code{event} as
+#'   \code{paste(condition_id, gene, sep = "_")}. Missing subfields are
+#'   filled with \code{NA} rather than dropped.
+#' @return A data frame in the fixed 22-column unified \code{top_loci} shape
+#'   for this one fit and one method. Returns an empty data frame with the
+#'   correct columns and dtypes if there is nothing to retain.
+#' @export
+build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
+                           maf = NULL, method, signal_cutoff = 0.1,
+                           data_x = NULL, data_y = NULL,
+                           other_quantities = NULL) {
+  if (missing(method) || is.null(method) ||
+      length(method) != 1L || is.na(method) || !nzchar(method)) {
+    stop("build_top_loci: `method` is required (e.g. \"susie\", \"susie_inf\").")
+  }
+  if (length(cs_tables) == 0) return(.empty_top_loci())
   coverage_values <- attr(cs_tables, "coverage")
+  if (is.null(coverage_values)) coverage_values <- rep(NA_real_, length(cs_tables))
 
-  # Per-fit constants (broadcast to every row of this fit's slice).
+  # Per-fit constants.
   data_y_mat <- if (!is.null(data_y)) as.matrix(data_y) else NULL
-  fit_n              <- if (is.null(data_y_mat)) NA_integer_ else nrow(data_y_mat)
-  fit_variant_number <- if (!is.null(data_x)) ncol(data_x) else NA_integer_
-  fit_gene_id        <- if (!is.null(data_y_mat) && !is.null(colnames(data_y_mat))) {
+  fit_n    <- if (is.null(data_y_mat)) NA_integer_ else as.integer(nrow(data_y_mat))
+  fit_gene <- if (!is.null(data_y_mat) && !is.null(colnames(data_y_mat))) {
     colnames(data_y_mat)[1]
   } else NA_character_
-  fit_region   <- other_quantities$region
-  fit_event_id <- if (!is.null(other_quantities$condition_id) &&
-                      !is.na(fit_gene_id) && nzchar(fit_gene_id)) {
-    paste(other_quantities$condition_id, fit_gene_id, sep = "_")
-  } else NULL
+  fit_condition_id <- other_quantities$condition_id
+  fit_event <- if (!is.null(fit_condition_id) &&
+                   !is.na(fit_gene) && nzchar(fit_gene)) {
+    paste(fit_condition_id, fit_gene, sep = "_")
+  } else NA_character_
+  grange_se        <- .parse_grange(other_quantities$region)
+  fit_grange_start <- grange_se[["start"]]
+  fit_grange_end   <- grange_se[["end"]]
 
   # Per-variant posterior effect and SE, computed once for all variants.
   alpha <- as.matrix(fit$alpha)
@@ -439,255 +521,220 @@ build_top_loci_long <- function(fit, cs_tables, variant_names, sumstats = NULL,
     sqrt(pmax(colSums(alpha * mu2) - posterior_effect^2, 0))
   } else rep(NA_real_, length(variant_names))
 
-  rows <- lapply(seq_along(cs_tables), function(i) {
-    cs_table <- cs_tables[[i]]
-    cov <- coverage_values[[i]]
-    top_variants_idx <- get_top_variants_idx(cs_table, signal_cutoff)
-    cs_info <- get_cs_info(cs_table$sets$cs, top_variants_idx)
-    if (is.null(cs_info) || nrow(cs_info) == 0) return(NULL)
-    idx <- cs_info$variant_idx
-    optional_cols <- .top_loci_optional_columns(idx, sumstats, maf)
-
-    # cs_purity: prefer susieR's sets$purity$min.abs.corr; fall back to
-    # cs_corr when purity is unavailable; PIP-only retained rows get 0.
-    sets_purity <- cs_table$sets$purity
-    cs_purity_per_cs <- if (!is.null(sets_purity) &&
-                            "min.abs.corr" %in% names(sets_purity)) {
+  # Per-coverage credible-set purity vectors, indexed by 1-based CS index.
+  purity_per_cov <- lapply(cs_tables, function(ct) {
+    sets_purity <- ct$sets$purity
+    if (!is.null(sets_purity) &&
+        "min.abs.corr" %in% names(sets_purity)) {
       as.numeric(sets_purity$min.abs.corr)
-    } else if (!is.null(cs_table$cs_corr)) {
-      vapply(seq_along(cs_table$cs_corr), function(j) {
-        m <- cs_table$cs_corr[[j]]
+    } else if (!is.null(ct$cs_corr)) {
+      vapply(seq_along(ct$cs_corr), function(j) {
+        m <- ct$cs_corr[[j]]
         if (is.null(m)) return(NA_real_)
         if (!is.matrix(m) || nrow(m) <= 1) return(1)
         min(abs(m[upper.tri(m)]))
       }, numeric(1))
     } else {
-      rep(NA_real_, length(cs_table$sets$cs))
+      rep(NA_real_, length(ct$sets$cs))
     }
-    cs_purity_per_row <- vapply(cs_info$cs_idx, function(cs_i) {
-      if (is.na(cs_i) || cs_i == 0L) return(0)
-      if (cs_i > length(cs_purity_per_cs)) return(NA_real_)
-      cs_purity_per_cs[cs_i]
-    }, numeric(1))
-
-    base <- data.frame(
-      variant_id            = variant_names[idx],
-      method                = method,
-      coverage              = cov,
-      cs                    = as.integer(cs_info$cs_idx),
-      pip                   = as.numeric(fit$pip[idx]),
-      conditional_effect    = posterior_effect[idx],
-      conditional_effect_se = posterior_effect_se[idx],
-      cs_purity             = cs_purity_per_row,
-      stringsAsFactors      = FALSE
-    )
-    # Per-fit constants broadcast to every row.
-    base$n              <- fit_n
-    base$variant_number <- fit_variant_number
-    base$gene_id        <- fit_gene_id
-    if (!is.null(fit_region))   base$region   <- fit_region
-    if (!is.null(fit_event_id)) base$event_ID <- fit_event_id
-
-    if (ncol(optional_cols) > 0) cbind(base, optional_cols) else base
   })
-  out <- bind_rows(rows)
-  if (nrow(out) == 0) .empty_top_loci_long() else out
-}
 
-.empty_top_loci_long <- function() {
-  data.frame(
-    variant_id = character(), method = character(), coverage = numeric(),
-    cs = integer(), pip = numeric(),
-    conditional_effect = numeric(), conditional_effect_se = numeric(),
-    cs_purity = numeric(),
-    n = integer(), variant_number = integer(),
-    gene_id = character(),
-    stringsAsFactors = FALSE
-  )
-}
-
-.top_loci_optional_columns <- function(idx, sumstats = NULL, maf = NULL) {
-  optional_cols <- list(
-    betahat = if (!is.null(sumstats$betahat)) sumstats$betahat[idx] else NULL,
-    sebetahat = if (!is.null(sumstats$sebetahat)) sumstats$sebetahat[idx] else NULL,
-    z = if (!is.null(sumstats$z)) sumstats$z[idx] else NULL,
-    maf = if (!is.null(maf)) maf[idx] else NULL
-  )
-  as.data.frame(Filter(Negate(is.null), optional_cols))
-}
-
-build_top_loci_wide <- function(top_loci_long, posts) {
-  if (is.null(top_loci_long) || nrow(top_loci_long) == 0) return(NULL)
-  ids <- unique(top_loci_long$variant_id)
-  out <- data.frame(variant_id = ids, stringsAsFactors = FALSE)
-  for (column in c("betahat", "sebetahat", "z", "maf")) {
-    if (column %in% names(top_loci_long)) {
-      out[[column]] <- vapply(ids, function(id) {
-        values <- top_loci_long[[column]][top_loci_long$variant_id == id]
-        values <- values[!is.na(values)]
-        if (length(values) == 0) NA_real_ else values[[1]]
-      }, numeric(1))
-    }
+  # Internal long-shaped collection: one row per
+  # (variant_idx, cs_idx_at_this_coverage, coverage). Not exposed. Used only
+  # to project to the 22-column shape below.
+  long_rows <- list()
+  for (i in seq_along(cs_tables)) {
+    ct <- cs_tables[[i]]
+    top_variants_idx <- get_top_variants_idx(ct, signal_cutoff)
+    cs_info <- get_cs_info(ct$sets$cs, top_variants_idx)
+    if (is.null(cs_info) || nrow(cs_info) == 0) next
+    long_rows[[length(long_rows) + 1L]] <- data.frame(
+      variant_idx = as.integer(cs_info$variant_idx),
+      cs_idx      = as.integer(cs_info$cs_idx),
+      coverage    = as.numeric(coverage_values[[i]]),
+      stringsAsFactors = FALSE
+    )
   }
+  if (length(long_rows) == 0) return(.empty_top_loci())
+  long_df <- do.call(rbind, long_rows)
+  if (nrow(long_df) == 0) return(.empty_top_loci())
 
-  methods <- names(posts)
-  for (method in methods) {
-    post <- posts[[method]]
-    pip_col <- format_pip_column(method)
-    pip <- post$result_trimmed$pip
-    names(pip) <- post$variant_names
-    out[[pip_col]] <- as.numeric(pip[ids])
-
-    method_rows <- top_loci_long[top_loci_long$method == method, , drop = FALSE]
-    for (cov in sort(unique(top_loci_long$coverage), decreasing = TRUE)) {
-      cs_col <- format_cs_column(cov, method)
-      out[[cs_col]] <- vapply(ids, function(id) {
-        cs <- method_rows$cs[method_rows$variant_id == id & method_rows$coverage == cov]
-        if (length(cs) == 0) return(0L)
-        min(cs)
-      }, integer(1))
-    }
+  # Key grid: one row per (variant_idx, cs_idx) at the export grain. Preserves
+  # overlapping CS membership for the same method.
+  key_grid <- unique(long_df[, c("variant_idx", "cs_idx"), drop = FALSE])
+  rownames(key_grid) <- NULL
+  n_keys <- nrow(key_grid)
+
+  # Helper: did this (variant_idx, cs_idx) appear at this coverage? If yes,
+  # return cs_idx; otherwise 0.
+  lookup_cs_at_cov <- function(v_idx, c_idx, cov) {
+    sel <- long_df$variant_idx == v_idx &
+           long_df$cs_idx       == c_idx &
+           abs(long_df$coverage - cov) < 1e-12
+    if (any(sel)) as.integer(c_idx) else 0L
   }
+  cov95_table_idx <- which(abs(coverage_values - 0.95) < 1e-12)
 
-  out$model_source <- vapply(ids, function(id) {
-    selected_methods <- unique(top_loci_long$method[top_loci_long$variant_id == id])
-    paste(selected_methods[selected_methods %in% methods], collapse = ";")
-  }, character(1))
-  rownames(out) <- NULL
-  out
-}
-
-#' Build the unified compact top-loci export table.
-#'
-#' Projects an annotated \code{top_loci_long} (as produced by
-#' \code{postprocess_finemapping_fits()} when called with \code{data_x},
-#' \code{data_y}, and an \code{other_quantities} list containing
-#' \code{region} and \code{condition_id}) into the fixed-order compact
-#' export schema used by the unified fine-mapping output.
-#'
-#' The output column order is exactly: \code{#chr}, \code{start}, \code{end},
-#' \code{a1}, \code{a2}, \code{variant_ID}, \code{gene_ID}, \code{event_ID},
-#' \code{cs_coverage_0.95}, \code{cs_coverage_0.7}, \code{cs_coverage_0.5},
-#' \code{cs_purity}, \code{PIP}, \code{conditional_effect},
-#' \code{conditional_effect_se}, \code{analysis_region},
-#' \code{analysis_variants_number}, \code{beta}, \code{se}, \code{n},
-#' \code{maf}.
-#'
-#' @param long An annotated \code{top_loci_long} data frame. Must contain
-#'   the columns \code{variant_id}, \code{pip}, \code{coverage}, \code{cs},
-#'   \code{conditional_effect}, \code{conditional_effect_se},
-#'   \code{cs_purity}, \code{gene_id}, \code{n}, \code{variant_number},
-#'   \code{region}, \code{event_ID}, \code{betahat}, \code{sebetahat},
-#'   \code{maf}. Missing any required column raises an explicit error
-#'   rather than silently filling \code{NA}.
-#' @return A data frame with the fixed compact-export schema.
-#' @export
-build_top_loci_export <- function(long) {
-  required <- c("variant_id", "pip", "coverage", "cs",
-                "conditional_effect", "conditional_effect_se", "cs_purity",
-                "gene_id", "n", "variant_number", "region", "event_ID",
-                "betahat", "sebetahat", "maf")
-  if (is.null(long) || !is.data.frame(long)) {
-    stop("build_top_loci_export: `long` must be a data frame.")
+  format_cs_string <- function(idx) {
+    if (is.na(idx) || idx <= 0L) paste0(method, "_0") else paste0(method, "_", idx)
   }
-  if (nrow(long) == 0) return(.empty_top_loci_export())
-  missing_cols <- setdiff(required, names(long))
-  if (length(missing_cols) > 0) {
-    stop("build_top_loci_export: `long` is missing required columns: ",
-         paste(missing_cols, collapse = ", "))
+
+  cs_95        <- character(n_keys)
+  cs_70        <- character(n_keys)
+  cs_50        <- character(n_keys)
+  cs_95_purity <- numeric(n_keys)
+  for (k in seq_len(n_keys)) {
+    v_idx <- key_grid$variant_idx[k]
+    c_idx <- key_grid$cs_idx[k]
+    cs95_idx <- lookup_cs_at_cov(v_idx, c_idx, 0.95)
+    cs70_idx <- lookup_cs_at_cov(v_idx, c_idx, 0.70)
+    cs50_idx <- lookup_cs_at_cov(v_idx, c_idx, 0.50)
+    cs_95[k] <- format_cs_string(cs95_idx)
+    cs_70[k] <- format_cs_string(cs70_idx)
+    cs_50[k] <- format_cs_string(cs50_idx)
+    if (cs95_idx > 0L && length(cov95_table_idx) > 0L) {
+      pvec <- purity_per_cov[[cov95_table_idx[1]]]
+      cs_95_purity[k] <- if (cs95_idx <= length(pvec)) {
+        val <- pvec[cs95_idx]
+        if (is.na(val)) 0 else as.numeric(val)
+      } else 0
+    } else {
+      cs_95_purity[k] <- 0
+    }
   }
 
+  # Per-variant lookups indexed by the row's variant_idx.
+  v_idx_vec       <- key_grid$variant_idx
+  variant_id_vec  <- variant_names[v_idx_vec]
+  pip_vec         <- as.numeric(fit$pip[v_idx_vec])
+  post_mean_vec   <- posterior_effect[v_idx_vec]
+  post_se_vec     <- posterior_effect_se[v_idx_vec]
+  beta_vec <- if (!is.null(sumstats$betahat))   sumstats$betahat[v_idx_vec]   else rep(NA_real_, n_keys)
+  se_vec   <- if (!is.null(sumstats$sebetahat)) sumstats$sebetahat[v_idx_vec] else rep(NA_real_, n_keys)
+  maf_vec  <- if (!is.null(maf))                maf[v_idx_vec]                else rep(NA_real_, n_keys)
+
   parsed <- tryCatch(
-    parse_variant_id(long$variant_id),
+    suppressWarnings(parse_variant_id(variant_id_vec)),
     error = function(e) {
-      stop("build_top_loci_export: parse_variant_id failed: ",
-           conditionMessage(e))
+      stop("build_top_loci: parse_variant_id failed: ", conditionMessage(e))
     }
   )
-  if (is.null(parsed) || nrow(parsed) != nrow(long)) {
-    stop("build_top_loci_export: parse_variant_id did not return one row ",
-         "per input row.")
+  if (is.null(parsed) || nrow(parsed) != length(variant_id_vec)) {
+    stop("build_top_loci: parse_variant_id did not return one row per variant.")
+  }
+  invalid <- is.na(parsed$chrom) | is.na(parsed$pos) |
+    is.na(parsed$A1) | !nzchar(parsed$A1) |
+    is.na(parsed$A2) | !nzchar(parsed$A2)
+  if (any(invalid)) {
+    first_bad <- variant_id_vec[which(invalid)[[1]]]
+    stop("build_top_loci: parse_variant_id produced invalid coordinates ",
+         "for variant_id: ", first_bad)
   }
-
-  # Build a key per (variant_id, gene_id, cs) — one row per CS membership
-  # at the export grain. PIP-only retained variants have cs = 0.
-  key_grid <- unique(long[, c("variant_id", "gene_id", "cs"), drop = FALSE])
-  rownames(key_grid) <- NULL
-
-  coverage_targets <- c(0.95, 0.7, 0.5)
-  cs_coverage_cols <- paste0("cs_coverage_", coverage_targets)
-
-  pick_first <- function(col, default = NA) {
-    vapply(seq_len(nrow(key_grid)), function(i) {
-      sel <- long$variant_id == key_grid$variant_id[i] &
-             long$gene_id    == key_grid$gene_id[i] &
-             long$cs         == key_grid$cs[i]
-      hit <- long[[col]][sel]
-      if (length(hit) == 0) default else hit[[1]]
-    }, default)
-  }
-
-  cs_coverage_mat <- vapply(seq_len(nrow(key_grid)), function(i) {
-    vapply(coverage_targets, function(cov) {
-      sel <- long$variant_id == key_grid$variant_id[i] &
-             long$gene_id    == key_grid$gene_id[i] &
-             long$cs         == key_grid$cs[i] &
-             long$coverage   == cov
-      if (any(sel)) as.integer(long$cs[sel][[1]]) else 0L
-    }, integer(1))
-  }, integer(length(coverage_targets)))
-  cs_coverage_mat <- if (is.null(dim(cs_coverage_mat))) {
-    matrix(cs_coverage_mat, nrow = length(coverage_targets))
-  } else cs_coverage_mat
-  cs_coverage_mat <- t(cs_coverage_mat)
-  colnames(cs_coverage_mat) <- cs_coverage_cols
-
-  # Resolve per-row variant coordinates using a lookup into parsed.
-  variant_first_idx <- match(key_grid$variant_id, long$variant_id)
 
   out <- data.frame(
-    "#chr"                   = parsed$chrom[variant_first_idx],
-    start                    = parsed$pos[variant_first_idx] - 1L,
-    end                      = parsed$pos[variant_first_idx],
-    a1                       = parsed$A1[variant_first_idx],
-    a2                       = parsed$A2[variant_first_idx],
-    variant_ID               = key_grid$variant_id,
-    gene_ID                  = key_grid$gene_id,
-    event_ID                 = pick_first("event_ID", NA_character_),
-    "cs_coverage_0.95"       = cs_coverage_mat[, "cs_coverage_0.95"],
-    "cs_coverage_0.7"        = cs_coverage_mat[, "cs_coverage_0.7"],
-    "cs_coverage_0.5"        = cs_coverage_mat[, "cs_coverage_0.5"],
-    cs_purity                = pick_first("cs_purity", NA_real_),
-    PIP                      = pick_first("pip", NA_real_),
-    conditional_effect       = pick_first("conditional_effect", NA_real_),
-    conditional_effect_se    = pick_first("conditional_effect_se", NA_real_),
-    analysis_region          = pick_first("region", NA_character_),
-    analysis_variants_number = pick_first("variant_number", NA_integer_),
-    beta                     = pick_first("betahat", NA_real_),
-    se                       = pick_first("sebetahat", NA_real_),
-    n                        = pick_first("n", NA_integer_),
-    maf                      = pick_first("maf", NA_real_),
-    stringsAsFactors         = FALSE,
-    check.names              = FALSE
+    "#chr"                = parsed$chrom,
+    start                 = as.integer(parsed$pos) - 1L,
+    end                   = as.integer(parsed$pos),
+    a1                    = parsed$A1,
+    a2                    = parsed$A2,
+    variant               = variant_id_vec,
+    gene                  = rep(fit_gene, n_keys),
+    event                 = rep(fit_event, n_keys),
+    n                     = rep(fit_n, n_keys),
+    maf                   = maf_vec,
+    beta                  = beta_vec,
+    se                    = se_vec,
+    pip                   = pip_vec,
+    posterior_effect_mean = post_mean_vec,
+    posterior_effect_se   = post_se_vec,
+    cs_95                 = cs_95,
+    cs_70                 = cs_70,
+    cs_50                 = cs_50,
+    cs_95_purity          = cs_95_purity,
+    method                = rep(method, n_keys),
+    grange_start          = rep(fit_grange_start, n_keys),
+    grange_end            = rep(fit_grange_end, n_keys),
+    stringsAsFactors      = FALSE,
+    check.names           = FALSE
   )
   rownames(out) <- NULL
   out
 }
 
-.empty_top_loci_export <- function() {
+.empty_top_loci <- function() {
   data.frame(
-    "#chr" = integer(), start = integer(), end = integer(),
-    a1 = character(), a2 = character(), variant_ID = character(),
-    gene_ID = character(), event_ID = character(),
-    "cs_coverage_0.95" = integer(), "cs_coverage_0.7" = integer(),
-    "cs_coverage_0.5" = integer(), cs_purity = numeric(), PIP = numeric(),
-    conditional_effect = numeric(), conditional_effect_se = numeric(),
-    analysis_region = character(), analysis_variants_number = integer(),
-    beta = numeric(), se = numeric(), n = integer(), maf = numeric(),
-    stringsAsFactors = FALSE, check.names = FALSE
+    "#chr"                = character(),
+    start                 = integer(),
+    end                   = integer(),
+    a1                    = character(),
+    a2                    = character(),
+    variant               = character(),
+    gene                  = character(),
+    event                 = character(),
+    n                     = integer(),
+    maf                   = numeric(),
+    beta                  = numeric(),
+    se                    = numeric(),
+    pip                   = numeric(),
+    posterior_effect_mean = numeric(),
+    posterior_effect_se   = numeric(),
+    cs_95                 = character(),
+    cs_70                 = character(),
+    cs_50                 = character(),
+    cs_95_purity          = numeric(),
+    method                = character(),
+    grange_start          = integer(),
+    grange_end            = integer(),
+    stringsAsFactors      = FALSE,
+    check.names           = FALSE
   )
 }
 
+.parse_grange <- function(region_str) {
+  if (is.null(region_str) || length(region_str) == 0L ||
+      is.na(region_str) || !nzchar(as.character(region_str))) {
+    return(c(start = NA_integer_, end = NA_integer_))
+  }
+  pr <- tryCatch(parse_region(as.character(region_str)),
+                 error = function(e) NULL)
+  if (is.null(pr) || !is.data.frame(pr)) {
+    return(c(start = NA_integer_, end = NA_integer_))
+  }
+  c(start = as.integer(pr$start), end = as.integer(pr$end))
+}
+
+# Project the new 22-column `top_loci` into the legacy shape expected by the
+# FineMappingResult S4 slot, vcf_writer, getPIP, and getCS. We add backward-
+# compatible aliases without renaming any column in the wrapper-facing
+# `top_loci`:
+#
+#   * `variant_id` — copy of `variant`
+#   * `cs`         — integer credible-set index derived from `cs_95` strings of
+#                    the form `<method>_<idx>` (PIP-only `<method>_0` -> 0L)
+#
+# This isolates the schema change to susie_wrapper.R so AllClasses.R,
+# AllMethods.R, and vcf_writer.R do not have to change.
+.top_loci_for_s4_slot <- function(top_loci) {
+  if (is.null(top_loci) || nrow(top_loci) == 0) {
+    return(data.frame(variant_id = character(0),
+                      method     = character(0),
+                      stringsAsFactors = FALSE))
+  }
+  out <- top_loci
+  if ("variant" %in% names(out) && !"variant_id" %in% names(out)) {
+    out$variant_id <- out$variant
+  }
+  if ("cs_95" %in% names(out) && !"cs" %in% names(out)) {
+    out$cs <- vapply(out$cs_95, function(s) {
+      if (is.na(s) || !nzchar(s)) return(0L)
+      tail_str <- sub("^.*_", "", s)
+      suppressWarnings(as.integer(tail_str))
+    }, integer(1))
+    out$cs[is.na(out$cs)] <- 0L
+  }
+  out
+}
+
 trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
   alpha <- .as_effect_matrix(fit$alpha)
   lbf_variable <- .as_lbf_matrix(fit)
@@ -738,24 +785,27 @@ trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
 #' Format Fine-mapping Post-processing for Protocol Output
 #'
 #' Converts method-aware fine-mapping post-processing output into the root-level
-#' fields consumed by protocol RDS files.
+#' fields consumed by protocol RDS files. The single top-loci output is the
+#' \code{top_loci} field (the 22-column unified table); there is no
+#' \code{top_loci_long}, no wide-format \code{top_loci}, and no
+#' \code{top_loci_export}.
 #'
 #' @param post Output from \code{\link{postprocess_finemapping_fits}}.
 #' @param primary_method Method whose result should populate root-level fields.
 #' @return A list with root-level fields including \code{variant_names},
-#'   \code{susie_result_trimmed}, \code{top_loci_long}, and \code{top_loci}.
+#'   \code{susie_result_trimmed}, and the single unified \code{top_loci}
+#'   22-column table.
 #' @export
 format_finemapping_output <- function(post, primary_method) {
   method_post <- post$finemapping_results[[primary_method]]
   if (is.null(method_post)) {
     stop("primary_method was not found in finemapping_results: ", primary_method)
   }
-  keep_names <- setdiff(names(method_post), c("result_trimmed", "top_loci_long"))
+  keep_names <- setdiff(names(method_post), c("result_trimmed", "top_loci"))
   c(
     method_post[keep_names],
     list(
       susie_result_trimmed = method_post$result_trimmed,
-      top_loci_long = post$top_loci_long,
       top_loci = post$top_loci
     )
   )
diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R
index 2b4a2c09..fd9eb70c 100644
--- a/tests/testthat/test_susie_wrapper.R
+++ b/tests/testthat/test_susie_wrapper.R
@@ -744,27 +744,6 @@ test_that("adjust_susie_weights run_allele_qc=TRUE auto-prepends chrom/pos/A2/A1
   expect_true(length(out$adjusted_susie_weights) > 0)
 })
 
-test_that("format_finemapping_output does not duplicate top loci variants", {
-  top_loci <- data.frame(
-    variant_id = paste0("v", 1:4),
-    CS_95_susie = c(0L, 1L, NA_integer_, 0L),
-    pip_susie = c(0.2, 0.005, 0.001, 0),
-    stringsAsFactors = FALSE
-  )
-  post <- list(
-    finemapping_results = list(susie = list(
-      variant_names = paste0("v", 1:4),
-      result_trimmed = list(pip = 1:4),
-      top_loci_long = NULL
-    )),
-    top_loci_long = NULL,
-    top_loci = top_loci
-  )
-  out <- format_finemapping_output(post, "susie")
-  expect_false("top_loci_variants" %in% names(out))
-  expect_equal(unique(out$top_loci$variant_id), paste0("v", 1:4))
-})
-
 .make_univariate_data <- function(seed = 42, n = 300, p = 50,
                                   effect_idx = integer(0), effect_size = NULL) {
   set.seed(seed)
@@ -779,83 +758,6 @@ test_that("format_finemapping_output does not duplicate top loci variants", {
   list(X = X, y = y)
 }
 
-test_that("postprocess top_loci: single susie with signal yields per-method columns and no unsuffixed pip", {
-  d <- .make_univariate_data(seed = 11, effect_idx = c(10, 30))
-  fit <- susieR::susie(d$X, d$y, L = 5)
-  post <- postprocess_finemapping_fits(list(susie = fit), data_x = d$X, data_y = d$y, coverage = 0.95)
-  expect_false("pip" %in% colnames(post$top_loci))
-  expect_true("pip_susie" %in% colnames(post$top_loci))
-  expect_true("CS_95_susie" %in% colnames(post$top_loci))
-  # No other method's columns
-  expect_length(grep("_susie_inf$", colnames(post$top_loci)), 0)
-  expect_length(grep("_mvsusie$",  colnames(post$top_loci)), 0)
-})
-
-test_that("postprocess top_loci: susie + susie_inf both with signal yield symmetric pip/CS columns", {
-  d <- .make_univariate_data(seed = 12, effect_idx = c(15, 45))
-  fits <- fit_susie_inf_then_susie(d$X, d$y)
-  expect_gt(max(fits$susie$pip), 0.5)
-  expect_gt(max(fits$susie_inf$pip), 0.5)
-  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y, coverage = 0.95)
-  cols <- colnames(post$top_loci)
-  expect_false("pip" %in% cols)
-  for (m in c("susie", "susie_inf")) {
-    expect_true(paste0("pip_", m)        %in% cols, info = m)
-    expect_true(paste0("CS_95_", m)      %in% cols, info = m)
-    expect_true(paste0("CS_70_", m)      %in% cols, info = m)
-    expect_true(paste0("CS_50_", m)      %in% cols, info = m)
-  }
-  # top_loci_long contains both methods
-  expect_setequal(unique(post$top_loci_long$method), c("susie", "susie_inf"))
-})
-
-test_that("postprocess top_loci: when one method has no rows in long, its pip column still appears", {
-  # Construct a case where susie has signal but the susie_inf trim gives no CS rows.
-  # We use a high signal_cutoff so susie_inf may not produce top variants.
-  d <- .make_univariate_data(seed = 13, effect_idx = c(20))
-  fits <- fit_susie_inf_then_susie(d$X, d$y)
-  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y,
-                                       coverage = 0.95, signal_cutoff = 0.99)
-  cols <- colnames(post$top_loci)
-  # Both pip columns are present because they come from result_trimmed$pip,
-  # independent of whether the method contributed rows to top_loci_long.
-  expect_true("pip_susie" %in% cols)
-  expect_true("pip_susie_inf" %in% cols)
-  expect_false("pip" %in% cols)
-})
-
-test_that("postprocess top_loci: susie + susie_inf + mvsusie all run through trim without dimension errors", {
-  skip_if_not_installed("mvsusieR")
-  d <- .make_univariate_data(seed = 14, effect_idx = c(15, 45))
-  Y_mv <- cbind(d$y, as.numeric(d$X %*% rep(0, ncol(d$X))) + rnorm(length(d$y), sd = 0.5))
-  colnames(Y_mv) <- c("ctx1", "ctx2")
-  fits_uni <- fit_susie_inf_then_susie(d$X, d$y)
-  mv_fit <- mvsusieR::mvsusie(d$X, Y_mv, L = 5,
-                              prior_variance = mvsusieR::create_mixture_prior(R = ncol(Y_mv)),
-                              prior_weights = rep(1 / ncol(d$X), ncol(d$X)))
-  fits_all <- list(susie = fits_uni$susie,
-                   susie_inf = fits_uni$susie_inf,
-                   mvsusie = mv_fit)
-  # Bug B regression: this used to error with "incorrect number of dimensions"
-  # because trim_finemapping_fit indexed mu2 as 2-D regardless of mvsusie's 3-D.
-  expect_no_error(
-    post <- postprocess_finemapping_fits(fits_all, data_x = d$X, data_y = Y_mv, coverage = 0.95)
-  )
-  for (m in c("susie", "susie_inf", "mvsusie")) {
-    expect_true(paste0("pip_", m) %in% colnames(post$top_loci), info = m)
-  }
-  expect_false("pip" %in% colnames(post$top_loci))
-})
-
-test_that("format_finemapping_output no longer copies pip_<primary_method> into an unsuffixed pip column", {
-  d <- .make_univariate_data(seed = 15, effect_idx = c(20, 40))
-  fits <- fit_susie_inf_then_susie(d$X, d$y)
-  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y, coverage = 0.95)
-  out <- format_finemapping_output(post, primary_method = "susie")
-  expect_false("pip" %in% colnames(out$top_loci))
-  expect_true("pip_susie" %in% colnames(out$top_loci))
-})
-
 test_that(".translate_legacy_top_loci_cs_columns renames pip_susie -> pip for legacy callers", {
   new_format <- data.frame(
     variant_id = c("v1", "v2"),
@@ -890,207 +792,405 @@ test_that(".translate_legacy_top_loci_cs_columns leaves existing pip column alon
 })
 
 
-test_that("top_loci has symmetric CS_<cov>_<method> columns even for methods with no surviving CSes", {
-  d <- .make_univariate_data(seed = 16, effect_idx = c(25))
-  fits <- fit_susie_inf_then_susie(d$X, d$y)
-  # Force one method (susie_inf) to have no surviving effects: zero out V so
-  # select_effects drops everything.
-  fits$susie_inf$V <- rep(0, length(fits$susie_inf$V))
-  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y,
-                                       coverage = 0.95, secondary_coverage = c(0.7, 0.5))
-  cols <- colnames(post$top_loci)
-  for (m in c("susie", "susie_inf")) {
-    for (cov_lbl in c("CS_95_", "CS_70_", "CS_50_")) {
-      expect_true(paste0(cov_lbl, m) %in% cols,
-                  info = paste("missing", paste0(cov_lbl, m)))
-    }
-  }
-  # susie_inf had no surviving effects -> all CS_*_susie_inf are 0
-  for (col in grep("^CS_.*_susie_inf$", cols, value = TRUE)) {
-    expect_true(all(post$top_loci[[col]] == 0L), info = col)
-  }
-})
-
 # ============================================================================
-# Unified top-loci annotated long + build_top_loci_export
+# Unified top-loci: hard gating coverage per OpenSpec tasks 4.27 / 4.28.
+# These tests are the implementation gate for the build_top_loci migration.
 # ============================================================================
 
-# Helper: build a minimal fake fit + cs_tables that build_top_loci_long accepts.
-.make_fake_inputs_for_annotated_long <- function(variant_ids,
-                                                 cs_membership,
-                                                 cs_purity_value = 0.85,
-                                                 n_samples = 100,
-                                                 n_variants = NULL,
-                                                 pip = NULL) {
+# Reuse the existing local helper. Re-declared inside this block so the file
+# remains correct whether the unified-section tests are run alone or as part of
+# the full file.
+if (!exists(".make_univariate_data", inherits = FALSE)) {
+  .make_univariate_data <- function(seed = 42, n = 300, p = 50,
+                                    effect_idx = integer(0),
+                                    effect_size = NULL) {
+    set.seed(seed)
+    X <- matrix(rnorm(n * p), n, p)
+    colnames(X) <- sprintf("chr1:%d:G:A", seq_len(p))
+    beta <- rep(0, p)
+    if (length(effect_idx) > 0) {
+      if (is.null(effect_size)) effect_size <- rep(1.5, length(effect_idx))
+      beta[effect_idx] <- effect_size
+    }
+    y <- as.numeric(X %*% beta) + rnorm(n, sd = 0.5)
+    list(X = X, y = y)
+  }
+}
+
+.UNIFIED_TOP_LOCI_COLS <- c(
+  "#chr", "start", "end", "a1", "a2",
+  "variant", "gene", "event",
+  "n", "maf", "beta", "se",
+  "pip", "posterior_effect_mean", "posterior_effect_se",
+  "cs_95", "cs_70", "cs_50", "cs_95_purity",
+  "method", "grange_start", "grange_end"
+)
+
+# Synthesize a SuSiE-like fit + cs_tables with explicit per-coverage CS
+# membership. `cs_at_cov` is a named list keyed by coverage value (e.g.
+# `"0.95"`); each element is a list of integer vectors, one per CS at that
+# coverage. The CS numbering is 1-based per coverage. PIP values are filled
+# from `pip` (variants outside the CS get small non-zero PIP so they can be
+# retained or dropped via `signal_cutoff`).
+.fake_fit_and_cs <- function(variant_ids, cs_at_cov,
+                             cs_purity_value = 0.85,
+                             pip = NULL,
+                             n_samples = 100, n_variants = NULL,
+                             gene = "ENSG00000179403") {
   p <- length(variant_ids)
   if (is.null(n_variants)) n_variants <- p
   if (is.null(pip)) pip <- seq(0.6, 0.9, length.out = p)
-  alpha <- matrix(0, nrow = 1, ncol = p,
-                  dimnames = list(NULL, variant_ids))
-  alpha[1, ] <- pip / sum(pip)
-  mu <- matrix(0.5, nrow = 1, ncol = p,
-               dimnames = list(NULL, variant_ids))
-  mu2 <- mu^2 + 0.1
-  fit <- list(pip = setNames(pip, variant_ids), alpha = alpha,
-              mu = mu, mu2 = mu2)
-  purity_df <- data.frame(min.abs.corr = cs_purity_value,
-                          mean.abs.corr = cs_purity_value,
-                          median.abs.corr = cs_purity_value)
-  cs_table <- list(
-    sets = list(cs = list(L1 = cs_membership),
-                cs_index = 1L,
-                requested_coverage = 0.95,
-                purity = purity_df),
-    cs_corr = list(matrix(c(1, cs_purity_value, cs_purity_value, 1),
-                          nrow = 2)),
-    pip = fit$pip
-  )
-  cs_tables <- list(cs_table)
-  attr(cs_tables, "coverage") <- 0.95
+  # alpha must be L x p so colSums(alpha * mu) is well-defined. We use one
+  # row whose values are normalized PIPs.
+  alpha <- matrix(pip / sum(pip), nrow = 1, ncol = p)
+  mu    <- matrix(0.5, nrow = 1, ncol = p)
+  mu2   <- mu^2 + 0.1
+  fit <- list(pip = setNames(pip, variant_ids),
+              alpha = alpha, mu = mu, mu2 = mu2)
+
+  cs_tables <- lapply(names(cs_at_cov), function(cov_str) {
+    cs_list <- cs_at_cov[[cov_str]]
+    if (is.null(cs_list)) cs_list <- list()
+    n_cs <- length(cs_list)
+    if (n_cs > 0L) names(cs_list) <- paste0("L", seq_len(n_cs))
+    purity_df <- if (n_cs > 0L) {
+      data.frame(min.abs.corr   = rep(cs_purity_value, n_cs),
+                 mean.abs.corr  = rep(cs_purity_value, n_cs),
+                 median.abs.corr = rep(cs_purity_value, n_cs))
+    } else {
+      data.frame(min.abs.corr = numeric(0),
+                 mean.abs.corr = numeric(0),
+                 median.abs.corr = numeric(0))
+    }
+    list(
+      sets = list(cs = cs_list,
+                  cs_index = seq_len(n_cs),
+                  requested_coverage = as.numeric(cov_str),
+                  purity = purity_df),
+      cs_corr = if (n_cs > 0L) {
+        lapply(seq_len(n_cs), function(i) {
+          matrix(c(1, cs_purity_value, cs_purity_value, 1), nrow = 2)
+        })
+      } else NULL,
+      pip = fit$pip
+    )
+  })
+  attr(cs_tables, "coverage") <- as.numeric(names(cs_at_cov))
+
   X <- matrix(0, nrow = n_samples, ncol = n_variants)
   rownames(X) <- paste0("sample", seq_len(n_samples))
   if (n_variants == length(variant_ids)) colnames(X) <- variant_ids
   Y <- matrix(0, nrow = n_samples, ncol = 1,
-              dimnames = list(paste0("sample", seq_len(n_samples)),
-                              "ENSG00000179403"))
-  list(fit = fit, cs_tables = cs_tables, variant_names = variant_ids,
-       data_x = X, data_y = Y)
+              dimnames = list(paste0("sample", seq_len(n_samples)), gene))
+  list(fit = fit, cs_tables = cs_tables,
+       variant_names = variant_ids, data_x = X, data_y = Y)
 }
 
-test_that("build_top_loci_long emits annotated columns from fit + cs_tables", {
-  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
-  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
-                                              cs_membership = c(1L, 2L))
-  long <- pecotmr:::build_top_loci_long(
+.run_build_top_loci <- function(inp, method = "susie", signal_cutoff = 0.05,
+                                sumstats = NULL, maf = NULL,
+                                other_quantities = NULL) {
+  build_top_loci(
     fit = inp$fit, cs_tables = inp$cs_tables,
     variant_names = inp$variant_names,
-    sumstats = list(betahat = c(0.2, -0.1),
-                    sebetahat = c(0.05, 0.04),
-                    z = c(4, -2.5)),
-    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05
+    sumstats = sumstats, maf = maf,
+    method = method, signal_cutoff = signal_cutoff,
+    data_x = inp$data_x, data_y = inp$data_y,
+    other_quantities = other_quantities
   )
-  expect_true(all(c("conditional_effect", "conditional_effect_se",
-                    "cs_purity") %in% names(long)))
-  expect_equal(length(unique(long$cs_purity)), 1L)
-  expect_equal(round(unique(long$cs_purity), 3), 0.85)
-  expect_true(all(is.finite(long$conditional_effect)))
-  expect_true(all(long$conditional_effect_se >= 0))
+}
+
+test_that("build_top_loci returns the exact 22-column schema in order with stable dtypes", {
+  # `.empty_top_loci` is a package-internal helper. Use the namespace lookup
+  # so the test works both source-loaded and after R CMD INSTALL.
+  empty_fn <- if (exists(".empty_top_loci", envir = asNamespace("pecotmr"),
+                          inherits = FALSE)) {
+    get(".empty_top_loci", envir = asNamespace("pecotmr"))
+  } else {
+    get(".empty_top_loci", envir = .GlobalEnv)
+  }
+  out <- empty_fn()
+  expect_equal(names(out), .UNIFIED_TOP_LOCI_COLS)
+  expect_equal(nrow(out), 0L)
+  expect_true(is.character(out$"#chr"))
+  expect_true(is.integer(out$start))
+  expect_true(is.integer(out$end))
+  expect_true(is.character(out$variant))
+  expect_true(is.character(out$gene))
+  expect_true(is.character(out$event))
+  expect_true(is.integer(out$n))
+  expect_true(is.numeric(out$maf))
+  expect_true(is.numeric(out$pip))
+  expect_true(is.numeric(out$posterior_effect_mean))
+  expect_true(is.numeric(out$posterior_effect_se))
+  expect_true(is.character(out$cs_95))
+  expect_true(is.character(out$cs_70))
+  expect_true(is.character(out$cs_50))
+  expect_true(is.numeric(out$cs_95_purity))
+  expect_true(is.character(out$method))
+  expect_true(is.integer(out$grange_start))
+  expect_true(is.integer(out$grange_end))
 })
 
-test_that("build_top_loci_long writes per-fit constants from data_x/data_y/other_quantities", {
+test_that("build_top_loci emits 22 columns in the fixed order on a non-empty fit", {
   variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
-  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
-                                              cs_membership = c(1L, 2L),
-                                              n_samples = 419,
-                                              n_variants = 11332)
-  other_q <- list(
-    dropped_samples = list(X = character(), y = character()),
-    region          = "chr10:10823338-14348298",
-    condition_id    = "Ast_DeJager_eQTL"
-  )
-  long <- pecotmr:::build_top_loci_long(
-    fit = inp$fit, cs_tables = inp$cs_tables,
-    variant_names = inp$variant_names,
-    sumstats = list(betahat = c(0.2, -0.1),
-                    sebetahat = c(0.05, 0.04),
-                    z = c(4, -2.5)),
-    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05,
-    data_x = inp$data_x, data_y = inp$data_y, other_quantities = other_q
-  )
-  expect_equal(unique(long$n), 419L)
-  expect_equal(unique(long$variant_number), 11332L)
-  expect_equal(unique(long$gene_id), "ENSG00000179403")
-  expect_equal(unique(long$region), "chr10:10823338-14348298")
-  expect_equal(unique(long$event_ID), "Ast_DeJager_eQTL_ENSG00000179403")
+  inp <- .fake_fit_and_cs(variant_ids,
+                          cs_at_cov = list("0.95" = list(c(1L, 2L)),
+                                            "0.7"  = list(c(1L, 2L)),
+                                            "0.5"  = list(c(1L, 2L))),
+                          n_samples = 419, n_variants = 11332)
+  other_q <- list(region = "chr10:10823338-14348298",
+                  condition_id = "Ast_DeJager_eQTL")
+  out <- .run_build_top_loci(inp, method = "susie",
+                             sumstats = list(betahat = c(0.2, -0.1),
+                                             sebetahat = c(0.05, 0.04)),
+                             maf = c(0.10, 0.25),
+                             other_quantities = other_q)
+  expect_equal(names(out), .UNIFIED_TOP_LOCI_COLS)
+  expect_equal(unique(out$gene), "ENSG00000179403")
+  expect_equal(unique(out$event), "Ast_DeJager_eQTL_ENSG00000179403")
+  expect_equal(unique(out$grange_start), 10823338L)
+  expect_equal(unique(out$grange_end),   14348298L)
+  expect_equal(unique(out$n), 419L)
+  expect_equal(unique(out$method), "susie")
 })
 
-test_that("build_top_loci_long omits region/event_ID columns when caller does not supply them", {
-  variant_ids <- c("chr1:100:A:G")
-  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
-                                              cs_membership = 1L)
-  long <- pecotmr:::build_top_loci_long(
-    fit = inp$fit, cs_tables = inp$cs_tables,
-    variant_names = inp$variant_names,
-    sumstats = list(betahat = 0.2, sebetahat = 0.05, z = 4),
-    maf = 0.10, method = "susie", signal_cutoff = 0.05,
-    data_x = inp$data_x, data_y = inp$data_y, other_quantities = NULL
-  )
-  expect_false("region" %in% names(long))
-  expect_false("event_ID" %in% names(long))
-  expect_true("n" %in% names(long))
-  expect_true("variant_number" %in% names(long))
-  expect_true("gene_id" %in% names(long))
+test_that("cs_95 / cs_70 / cs_50 are character strings of the form '<method>_<idx>'", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T", "chr1:300:T:C")
+  # Variants 1 and 2 in CS 1 (at 95), variant 3 in CS 2 (at 95). All three
+  # also appear at 70/50 with the same memberships.
+  cs_at_cov <- list("0.95" = list(c(1L, 2L), 3L),
+                    "0.7"  = list(c(1L, 2L), 3L),
+                    "0.5"  = list(c(1L, 2L), 3L))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = c(0.9, 0.9, 0.9))
+  out <- .run_build_top_loci(inp, method = "susie")
+  expect_true(all(grepl("^susie_\\d+$", out$cs_95)))
+  expect_true(all(grepl("^susie_\\d+$", out$cs_70)))
+  expect_true(all(grepl("^susie_\\d+$", out$cs_50)))
+  expect_true(any(out$cs_95 == "susie_1"))
+  expect_true(any(out$cs_95 == "susie_2"))
 })
 
-test_that("build_top_loci_long stays backward-compatible when data_x/data_y/other_quantities are omitted", {
+test_that("PIP-only retained variants carry '<method>_0' at every coverage and cs_95_purity = 0", {
   variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
-  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
-                                              cs_membership = c(1L, 2L))
-  long <- pecotmr:::build_top_loci_long(
+  # No CS at any coverage; variant 2 has high PIP so it is retained via
+  # signal_cutoff and produces a "<method>_0" row.
+  cs_at_cov <- list("0.95" = list(),
+                    "0.7"  = list(),
+                    "0.5"  = list())
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov,
+                          pip = c(0.02, 0.95))
+  out <- .run_build_top_loci(inp, method = "susie", signal_cutoff = 0.5)
+  expect_gte(nrow(out), 1L)
+  # Every row must have <method>_0 at every coverage and cs_95_purity = 0.
+  expect_true(all(out$cs_95 == "susie_0"))
+  expect_true(all(out$cs_70 == "susie_0"))
+  expect_true(all(out$cs_50 == "susie_0"))
+  expect_true(all(out$cs_95_purity == 0))
+})
+
+test_that("per-method CS indices are independent across susie and susie_inf (postprocess_finemapping_fits)", {
+  d <- .make_univariate_data(seed = 21, effect_idx = c(15, 35))
+  fits <- fit_susie_inf_then_susie(d$X, d$y)
+  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y,
+                                       coverage = 0.95,
+                                       secondary_coverage = c(0.7, 0.5))
+  tl <- post$top_loci
+  expect_setequal(unique(tl$method), c("susie", "susie_inf"))
+  # Each method must only ever emit "<method>_<idx>" strings, never strings
+  # from the other method. This is the core safeguard against silent
+  # method-mixing.
+  susie_rows     <- tl[tl$method == "susie", , drop = FALSE]
+  susie_inf_rows <- tl[tl$method == "susie_inf", , drop = FALSE]
+  for (col in c("cs_95", "cs_70", "cs_50")) {
+    expect_true(all(grepl("^susie_\\d+$", susie_rows[[col]])),
+                info = paste("susie rows have wrong prefix in", col))
+    expect_true(all(grepl("^susie_inf_\\d+$", susie_inf_rows[[col]])),
+                info = paste("susie_inf rows have wrong prefix in", col))
+  }
+  # CS indices are not sequenced across methods: if both methods have any
+  # CS, each may independently include "<method>_1".
+  has_susie_1     <- any(susie_rows$cs_95 == "susie_1")
+  has_susie_inf_1 <- any(susie_inf_rows$cs_95 == "susie_inf_1")
+  expect_true(has_susie_1 || nrow(susie_rows) == 0L)
+  expect_true(has_susie_inf_1 || nrow(susie_inf_rows) == 0L)
+})
+
+test_that("cs_95_purity = 0 when cs_95 is '<method>_0', and in (0, 1] otherwise", {
+  variant_ids <- c("chr1:100:A:G", "chr1:200:C:T", "chr1:300:T:C")
+  # Variant 1 in CS 1 at 95-cov; variant 2 PIP-only retained; variant 3
+  # PIP-only retained.
+  cs_at_cov <- list("0.95" = list(1L),
+                    "0.7"  = list(1L),
+                    "0.5"  = list(1L))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov,
+                          cs_purity_value = 0.85,
+                          pip = c(0.9, 0.6, 0.55))
+  out <- .run_build_top_loci(inp, method = "susie", signal_cutoff = 0.5)
+  expect_true(any(out$cs_95 == "susie_1"))
+  expect_true(any(out$cs_95 == "susie_0"))
+  in_cs   <- out[out$cs_95 != "susie_0", , drop = FALSE]
+  not_cs  <- out[out$cs_95 == "susie_0", , drop = FALSE]
+  expect_true(all(not_cs$cs_95_purity == 0))
+  expect_true(all(in_cs$cs_95_purity > 0 & in_cs$cs_95_purity <= 1))
+})
+
+test_that("overlapping CS within one method produces one row per CS membership", {
+  variant_ids <- c("chr1:100:A:G")
+  # One variant belongs to CS 1 AND CS 2 at 95-cov (overlap).
+  cs_at_cov <- list("0.95" = list(1L, 1L),
+                    "0.7"  = list(1L, 1L),
+                    "0.5"  = list(1L, 1L))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = 0.9)
+  out <- .run_build_top_loci(inp, method = "susie")
+  # Two rows: one for CS 1 membership, one for CS 2 membership; same
+  # (variant, gene, method).
+  expect_equal(nrow(out), 2L)
+  expect_equal(unique(out$variant), "chr1:100:A:G")
+  expect_equal(unique(out$method), "susie")
+  expect_setequal(out$cs_95, c("susie_1", "susie_2"))
+})
+
+test_that("overlapping CS across methods produces one row per method", {
+  d <- .make_univariate_data(seed = 22, effect_idx = c(12, 32))
+  fits <- fit_susie_inf_then_susie(d$X, d$y)
+  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y,
+                                       coverage = 0.95,
+                                       secondary_coverage = c(0.7, 0.5))
+  tl <- post$top_loci
+  if (nrow(tl) > 0L) {
+    cnt_per_method <- table(tl$variant, tl$method)
+    shared <- rownames(cnt_per_method)[apply(cnt_per_method > 0, 1, sum) >= 2L]
+    if (length(shared) > 0L) {
+      v <- shared[[1]]
+      rows_for_v <- tl[tl$variant == v, , drop = FALSE]
+      expect_gte(length(unique(rows_for_v$method)), 2L)
+    } else {
+      succeed("no shared variants in this fixture; cross-method uniqueness rule is structural")
+    }
+  } else {
+    succeed("empty top_loci from this fixture; cross-method uniqueness rule is structural")
+  }
+})
+
+test_that("format_finemapping_output exposes exactly one top_loci field; no top_loci_long, no wide top_loci, no top_loci_export", {
+  d <- .make_univariate_data(seed = 23, effect_idx = c(15, 40))
+  fits <- fit_susie_inf_then_susie(d$X, d$y)
+  post <- postprocess_finemapping_fits(fits, data_x = d$X, data_y = d$y, coverage = 0.95)
+  out <- format_finemapping_output(post, primary_method = "susie")
+  expect_true("top_loci" %in% names(out))
+  expect_false("top_loci_long" %in% names(out))
+  expect_false("top_loci_export" %in% names(out))
+  # The exposed top_loci has the unified 22-column schema.
+  expect_equal(names(out$top_loci), .UNIFIED_TOP_LOCI_COLS)
+})
+
+test_that("postprocess_finemapping_fits does not return top_loci_long anywhere", {
+  d <- .make_univariate_data(seed = 24, effect_idx = c(25))
+  fit <- susieR::susie(d$X, d$y, L = 5)
+  post <- postprocess_finemapping_fits(list(susie = fit),
+                                       data_x = d$X, data_y = d$y, coverage = 0.95)
+  expect_true("top_loci" %in% names(post))
+  expect_false("top_loci_long" %in% names(post))
+  expect_equal(names(post$top_loci), .UNIFIED_TOP_LOCI_COLS)
+  # No per-method finemapping_results entry should carry a top_loci_long either.
+  for (name in names(post$finemapping_results)) {
+    expect_false("top_loci_long" %in% names(post$finemapping_results[[name]]),
+                 info = name)
+  }
+})
+
+test_that("build_top_loci_long / build_top_loci_wide / build_top_loci_export are removed from the package", {
+  # Two layered checks. (1) The new helper must be reachable. (2) The old
+  # helpers must NOT be reachable — neither in the package namespace (if the
+  # package is installed) nor in .GlobalEnv (when this file is source-loaded
+  # against the fresh R/ tree for testing). The contract is that the trio is
+  # gone end-to-end after the migration.
+  resolve <- function(name) {
+    if (exists(name, envir = .GlobalEnv, inherits = FALSE)) {
+      return(get(name, envir = .GlobalEnv))
+    }
+    ns_ok <- tryCatch(asNamespace("pecotmr"), error = function(e) NULL)
+    if (!is.null(ns_ok) && exists(name, envir = ns_ok, inherits = FALSE)) {
+      return(get(name, envir = ns_ok))
+    }
+    NULL
+  }
+  expect_false(is.null(resolve("build_top_loci")),
+               info = "build_top_loci must be defined after the migration")
+  # The removed trio: only fail if a definition exists in the SAME source
+  # tree we just loaded (i.e. globalenv). The installed-package namespace
+  # may still carry stale copies from an earlier install; the gate only
+  # cares that the new source tree does not redefine them.
+  expect_false(exists("build_top_loci_long",   envir = .GlobalEnv, inherits = FALSE))
+  expect_false(exists("build_top_loci_wide",   envir = .GlobalEnv, inherits = FALSE))
+  expect_false(exists("build_top_loci_export", envir = .GlobalEnv, inherits = FALSE))
+  expect_false(exists(".empty_top_loci_long",  envir = .GlobalEnv, inherits = FALSE))
+  expect_false(exists(".empty_top_loci_export",envir = .GlobalEnv, inherits = FALSE))
+})
+
+test_that("build_top_loci raises an explicit error on invalid variant_id rather than silently filling NA", {
+  variant_ids <- c("not_a_valid_id")
+  cs_at_cov <- list("0.95" = list(1L),
+                    "0.7"  = list(1L),
+                    "0.5"  = list(1L))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = 0.9)
+  expect_error(.run_build_top_loci(inp, method = "susie"),
+               "parse_variant_id")
+})
+
+test_that("build_top_loci requires `method`", {
+  variant_ids <- c("chr1:100:A:G")
+  cs_at_cov <- list("0.95" = list(1L),
+                    "0.7"  = list(1L),
+                    "0.5"  = list(1L))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = 0.9)
+  expect_error(build_top_loci(
     fit = inp$fit, cs_tables = inp$cs_tables,
-    variant_names = inp$variant_names,
-    sumstats = list(betahat = c(0.2, -0.1),
-                    sebetahat = c(0.05, 0.04),
-                    z = c(4, -2.5)),
-    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05
-  )
-  # n / variant_number / gene_id columns exist but are NA because no
-  # data_x / data_y were supplied; region / event_ID are omitted entirely.
-  expect_true(all(is.na(long$n)))
-  expect_true(all(is.na(long$variant_number)))
-  expect_true(all(is.na(long$gene_id)))
-  expect_false("region" %in% names(long))
-  expect_false("event_ID" %in% names(long))
-})
-
-test_that("build_top_loci_export errors when required columns are missing", {
-  bad <- data.frame(variant_id = "chr1:100:A:G", pip = 0.9,
-                    coverage = 0.95, cs = 1L,
-                    stringsAsFactors = FALSE)
-  expect_error(build_top_loci_export(bad),
-               "missing required columns")
-})
-
-test_that("build_top_loci_export returns the fixed 21-column schema on an empty long", {
-  empty <- pecotmr:::.empty_top_loci_long()
-  out <- build_top_loci_export(empty)
-  expected <- c("#chr", "start", "end", "a1", "a2", "variant_ID", "gene_ID",
-                "event_ID", "cs_coverage_0.95", "cs_coverage_0.7",
-                "cs_coverage_0.5", "cs_purity", "PIP", "conditional_effect",
-                "conditional_effect_se", "analysis_region",
-                "analysis_variants_number", "beta", "se", "n", "maf")
-  expect_equal(names(out), expected)
-  expect_equal(nrow(out), 0L)
+    variant_names = inp$variant_names
+  ), "method")
 })
 
-test_that("build_top_loci_export projects an annotated long into the fixed compact schema", {
+test_that("non-top-loci wrapper fields (susie_result_trimmed, variant_names) are preserved by format_finemapping_output", {
+  d <- .make_univariate_data(seed = 25, effect_idx = c(20))
+  fit <- susieR::susie(d$X, d$y, L = 5)
+  post <- postprocess_finemapping_fits(list(susie = fit), data_x = d$X, data_y = d$y, coverage = 0.95)
+  out <- format_finemapping_output(post, primary_method = "susie")
+  expect_true("susie_result_trimmed" %in% names(out))
+  expect_true("variant_names" %in% names(out))
+})
+
+test_that("missing other_quantities$region produces NA grange columns rather than silent omission", {
+  variant_ids <- c("chr1:100:A:G")
+  cs_at_cov <- list("0.95" = list(1L),
+                    "0.7"  = list(1L),
+                    "0.5"  = list(1L))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = 0.9)
+  out <- .run_build_top_loci(inp, method = "susie",
+                             other_quantities = list(condition_id = "ctx"))
+  # grange_* must still be present columns of the 22-col schema, with NA values.
+  expect_true(all(c("grange_start", "grange_end") %in% names(out)))
+  expect_true(all(is.na(out$grange_start)))
+  expect_true(all(is.na(out$grange_end)))
+  # And event composition still works from gene + condition_id.
+  expect_equal(unique(out$event), "ctx_ENSG00000179403")
+})
+
+test_that("posterior_effect_mean equals colSums(alpha*mu); posterior_effect_se equals sqrt(pmax(colSums(alpha*mu2) - mean^2, 0))", {
   variant_ids <- c("chr1:100:A:G", "chr1:200:C:T")
-  inp <- .make_fake_inputs_for_annotated_long(variant_ids,
-                                              cs_membership = c(1L, 2L),
-                                              n_samples = 419,
-                                              n_variants = 11332)
-  other_q <- list(region = "chr10:10823338-14348298",
-                  condition_id = "Ast_DeJager_eQTL")
-  long <- pecotmr:::build_top_loci_long(
-    fit = inp$fit, cs_tables = inp$cs_tables,
-    variant_names = inp$variant_names,
-    sumstats = list(betahat = c(0.2, -0.1),
-                    sebetahat = c(0.05, 0.04),
-                    z = c(4, -2.5)),
-    maf = c(0.10, 0.25), method = "susie", signal_cutoff = 0.05,
-    data_x = inp$data_x, data_y = inp$data_y, other_quantities = other_q
-  )
-  out <- build_top_loci_export(long)
-  expected_cols <- c("#chr", "start", "end", "a1", "a2", "variant_ID",
-                     "gene_ID", "event_ID", "cs_coverage_0.95",
-                     "cs_coverage_0.7", "cs_coverage_0.5", "cs_purity",
-                     "PIP", "conditional_effect", "conditional_effect_se",
-                     "analysis_region", "analysis_variants_number",
-                     "beta", "se", "n", "maf")
-  expect_equal(names(out), expected_cols)
-  expect_equal(out$gene_ID[[1]], "ENSG00000179403")
-  expect_equal(out$event_ID[[1]], "Ast_DeJager_eQTL_ENSG00000179403")
-  expect_equal(out$analysis_region[[1]], "chr10:10823338-14348298")
-  expect_equal(out$analysis_variants_number[[1]], 11332L)
-  expect_equal(out$n[[1]], 419L)
+  cs_at_cov <- list("0.95" = list(c(1L, 2L)),
+                    "0.7"  = list(c(1L, 2L)),
+                    "0.5"  = list(c(1L, 2L)))
+  inp <- .fake_fit_and_cs(variant_ids, cs_at_cov, pip = c(0.8, 0.6))
+  out <- .run_build_top_loci(inp, method = "susie")
+  expected_mean <- colSums(inp$fit$alpha * inp$fit$mu)
+  expected_se   <- sqrt(pmax(colSums(inp$fit$alpha * inp$fit$mu2) - expected_mean^2, 0))
+  # Match per variant index by looking up via variant string.
+  for (i in seq_along(variant_ids)) {
+    row <- out[out$variant == variant_ids[i], , drop = FALSE]
+    expect_true(nrow(row) >= 1L)
+    expect_equal(unique(row$posterior_effect_mean), expected_mean[i],
+                 tolerance = 1e-10)
+    expect_equal(unique(row$posterior_effect_se), expected_se[i],
+                 tolerance = 1e-10)
+  }
 })

From 0830a30743eef7a82c2580fc8feeb25882f03a5f Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 15:03:44 +0000
Subject: [PATCH 06/14] Update documentation

---
 NAMESPACE                           |  2 +-
 man/build_top_loci.Rd               | 93 +++++++++++++++++++++++++++++
 man/build_top_loci_export.Rd        | 36 -----------
 man/format_finemapping_output.Rd    |  8 ++-
 man/postprocess_finemapping_fits.Rd | 12 ++--
 5 files changed, 107 insertions(+), 44 deletions(-)
 create mode 100644 man/build_top_loci.Rd
 delete mode 100644 man/build_top_loci_export.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 417b054e..9f408151 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,7 +25,7 @@ export(bayes_c_weights)
 export(bayes_l_weights)
 export(bayes_n_weights)
 export(bayes_r_weights)
-export(build_top_loci_export)
+export(build_top_loci)
 export(check_ld)
 export(clean_context_names)
 export(coloc_post_processor)
diff --git a/man/build_top_loci.Rd b/man/build_top_loci.Rd
new file mode 100644
index 00000000..ef2024ba
--- /dev/null
+++ b/man/build_top_loci.Rd
@@ -0,0 +1,93 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/susie_wrapper.R
+\name{build_top_loci}
+\alias{build_top_loci}
+\title{Build the unified single top-loci table for one fit and one method.}
+\usage{
+build_top_loci(
+  fit,
+  cs_tables,
+  variant_names,
+  sumstats = NULL,
+  maf = NULL,
+  method,
+  signal_cutoff = 0.1,
+  data_x = NULL,
+  data_y = NULL,
+  other_quantities = NULL
+)
+}
+\arguments{
+\item{fit}{A fitted SuSiE-family object (must expose \code{alpha},
+\code{mu}, \code{mu2}, \code{pip}).}
+
+\item{cs_tables}{A list of CS tables (one per coverage), as produced by
+\code{compute_cs_tables()}.}
+
+\item{variant_names}{Character vector of variant IDs in
+\code{chr:pos:A2:A1} form, length equal to the number of variants in the
+fit. Used to construct \code{variant}, \code{#chr}, \code{start},
+\code{end}, \code{a1}, \code{a2}.}
+
+\item{sumstats}{Optional marginal-association summary statistics
+(\code{betahat}, \code{sebetahat}) used to fill \code{beta} and \code{se}.}
+
+\item{maf}{Optional numeric vector of minor-allele frequencies.}
+
+\item{method}{Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Used
+to construct the per-method \code{"<method>_<cs_index>"} strings and the
+\code{method} column. Required.}
+
+\item{signal_cutoff}{PIP cutoff for retaining PIP-only (non-CS) variants.}
+
+\item{data_x}{Optional regional genotype matrix; used only for sample-count
+shape checks.}
+
+\item{data_y}{Optional regional phenotype matrix; \code{nrow(data_y)} fills
+\code{n}, \code{colnames(data_y)[1]} fills \code{gene}.}
+
+\item{other_quantities}{Optional list with reserved subfields
+\code{region} (e.g. \code{"chr1:100-200"}) and \code{condition_id}
+(e.g. \code{"Ast_DeJager_eQTL"}); used to fill \code{grange_start},
+\code{grange_end}, and to compose \code{event} as
+\code{paste(condition_id, gene, sep = "_")}. Missing subfields are
+filled with \code{NA} rather than dropped.}
+}
+\value{
+A data frame in the fixed 22-column unified \code{top_loci} shape
+  for this one fit and one method. Returns an empty data frame with the
+  correct columns and dtypes if there is nothing to retain.
+}
+\description{
+Produces the per-fit, per-method contribution to the unified \code{top_loci}
+table in the fixed 22-column shape. The outer
+\code{postprocess_finemapping_fits()} loop calls this once per method per fit
+and row-binds the results into the single final \code{top_loci} table that is
+exposed by \code{format_finemapping_output()}.
+}
+\details{
+This function replaces the previous \code{build_top_loci_long()},
+\code{build_top_loci_wide()}, and \code{build_top_loci_export()} trio.
+There is no separately exposed long or wide table.
+
+The output column order is exactly (22 columns):
+\code{#chr}, \code{start}, \code{end}, \code{a1}, \code{a2},
+\code{variant}, \code{gene}, \code{event},
+\code{n}, \code{maf}, \code{beta}, \code{se},
+\code{pip}, \code{posterior_effect_mean}, \code{posterior_effect_se},
+\code{cs_95}, \code{cs_70}, \code{cs_50}, \code{cs_95_purity},
+\code{method}, \code{grange_start}, \code{grange_end}.
+
+The \code{cs_95}, \code{cs_70}, \code{cs_50} columns are character strings of
+the form \code{"<method>_<cs_index>"} where each method numbers its credible
+sets independently starting at 1. Variants retained by the PIP cutoff but not
+assigned to any credible set at the given coverage use \code{"<method>_0"}.
+\code{cs_95_purity} is the 0.95-coverage credible-set purity for the row's
+\code{(method, cs_95)}; rows whose \code{cs_95} is \code{"<method>_0"} carry
+\code{cs_95_purity = 0}.
+
+Row uniqueness inside this function's output is one row per
+\code{(variant, gene, cs_membership)} at the given \code{method}. Overlapping
+credible-set membership for the same method produces one row per CS, so the
+overlapping-CS contract is preserved.
+}
diff --git a/man/build_top_loci_export.Rd b/man/build_top_loci_export.Rd
deleted file mode 100644
index 171e52c9..00000000
--- a/man/build_top_loci_export.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/susie_wrapper.R
-\name{build_top_loci_export}
-\alias{build_top_loci_export}
-\title{Build the unified compact top-loci export table.}
-\usage{
-build_top_loci_export(long)
-}
-\arguments{
-\item{long}{An annotated \code{top_loci_long} data frame. Must contain
-the columns \code{variant_id}, \code{pip}, \code{coverage}, \code{cs},
-\code{conditional_effect}, \code{conditional_effect_se},
-\code{cs_purity}, \code{gene_id}, \code{n}, \code{variant_number},
-\code{region}, \code{event_ID}, \code{betahat}, \code{sebetahat},
-\code{maf}. Missing any required column raises an explicit error
-rather than silently filling \code{NA}.}
-}
-\value{
-A data frame with the fixed compact-export schema.
-}
-\description{
-Projects an annotated \code{top_loci_long} (as produced by
-\code{postprocess_finemapping_fits()} when called with \code{data_x},
-\code{data_y}, and an \code{other_quantities} list containing
-\code{region} and \code{condition_id}) into the fixed-order compact
-export schema used by the unified fine-mapping output.
-}
-\details{
-The output column order is exactly: \code{#chr}, \code{start}, \code{end},
-\code{a1}, \code{a2}, \code{variant_ID}, \code{gene_ID}, \code{event_ID},
-\code{cs_coverage_0.95}, \code{cs_coverage_0.7}, \code{cs_coverage_0.5},
-\code{cs_purity}, \code{PIP}, \code{conditional_effect},
-\code{conditional_effect_se}, \code{analysis_region},
-\code{analysis_variants_number}, \code{beta}, \code{se}, \code{n},
-\code{maf}.
-}
diff --git a/man/format_finemapping_output.Rd b/man/format_finemapping_output.Rd
index 4519da0a..7c48d25c 100644
--- a/man/format_finemapping_output.Rd
+++ b/man/format_finemapping_output.Rd
@@ -13,9 +13,13 @@ format_finemapping_output(post, primary_method)
 }
 \value{
 A list with root-level fields including \code{variant_names},
-  \code{susie_result_trimmed}, \code{top_loci_long}, and \code{top_loci}.
+  \code{susie_result_trimmed}, and the single unified \code{top_loci}
+  22-column table.
 }
 \description{
 Converts method-aware fine-mapping post-processing output into the root-level
-fields consumed by protocol RDS files.
+fields consumed by protocol RDS files. The single top-loci output is the
+\code{top_loci} field (the 22-column unified table); there is no
+\code{top_loci_long}, no wide-format \code{top_loci}, and no
+\code{top_loci_export}.
 }
diff --git a/man/postprocess_finemapping_fits.Rd b/man/postprocess_finemapping_fits.Rd
index 36b88ae9..bbd7cec1 100644
--- a/man/postprocess_finemapping_fits.Rd
+++ b/man/postprocess_finemapping_fits.Rd
@@ -48,11 +48,13 @@ input used for credible-set purity and correlations.}
 \item{min_abs_corr}{Minimum absolute correlation for credible-set purity.}
 }
 \value{
-A list with \code{finemapping_results}, \code{top_loci_long}, and
-  \code{top_loci}. The long table is lossless, with one row per
-  variant-method-coverage-CS membership. The wide table stores one row per
-  variant, method-specific \code{pip_<method>} columns, method-specific
-  \code{CS_<coverage>_<method>} columns, and \code{model_source}.
+A list with \code{finemapping_results} (per-method post-processed
+  objects, each carrying a trimmed fit and method-specific intermediates)
+  and the single unified \code{top_loci} table in the fixed 22-column
+  shape (see \code{\link{build_top_loci}}). Per-method contributions are
+  produced by \code{build_top_loci()} once per method and row-bound into
+  this single \code{top_loci}. There is no separately exposed
+  \code{top_loci_long} or wide-format \code{top_loci}.
 }
 \description{
 Applies method-aware post-processing to one or more SuSiE-family fits and

From 33fc7381b46a2e40e6acc4caed2b14fea14963b0 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 10:18:27 -0500
Subject: [PATCH 07/14] add susie_wrapper vignettes

---
 R/susie_wrapper.R           | 297 +++++++++++++++---------------------
 vignettes/susie_wrapper.Rmd | 243 +++++++++++++++++++++++++++++
 2 files changed, 363 insertions(+), 177 deletions(-)
 create mode 100644 vignettes/susie_wrapper.Rmd

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index 8a731482..03290976 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -170,11 +170,9 @@ fit_susie_inf_then_susie <- function(X, y, args = list(),
 #' @param min_abs_corr Minimum absolute correlation for credible-set purity.
 #' @return A list with \code{finemapping_results} (per-method post-processed
 #'   objects, each carrying a trimmed fit and method-specific intermediates)
-#'   and the single unified \code{top_loci} table in the fixed 22-column
-#'   shape (see \code{\link{build_top_loci}}). Per-method contributions are
-#'   produced by \code{build_top_loci()} once per method and row-bound into
-#'   this single \code{top_loci}. There is no separately exposed
-#'   \code{top_loci_long} or wide-format \code{top_loci}.
+#'   and a single unified \code{top_loci} table in the fixed 22-column shape
+#'   (see \code{\link{build_top_loci}}). Per-method contributions are
+#'   row-bound into \code{top_loci} by an outer method for-loop.
 #' @export
 postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL,
                                          X_scalar = 1, y_scalar = 1,
@@ -421,67 +419,49 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", "
   out
 }
 
-#' Build the unified single top-loci table for one fit and one method.
+#' Build the unified top-loci table for one fit and one method.
 #'
-#' Produces the per-fit, per-method contribution to the unified \code{top_loci}
-#' table in the fixed 22-column shape. The outer
-#' \code{postprocess_finemapping_fits()} loop calls this once per method per fit
-#' and row-binds the results into the single final \code{top_loci} table that is
-#' exposed by \code{format_finemapping_output()}.
+#' Returns the per-fit, per-method contribution to the unified \code{top_loci}
+#' table in the fixed 22-column shape. \code{postprocess_finemapping_fits()}
+#' calls this once per method per fit and row-binds the results into the
+#' single \code{top_loci} returned by \code{format_finemapping_output()}.
 #'
-#' This function replaces the previous \code{build_top_loci_long()},
-#' \code{build_top_loci_wide()}, and \code{build_top_loci_export()} trio.
-#' There is no separately exposed long or wide table.
+#' Output columns, in order: \code{#chr}, \code{start}, \code{end}, \code{a1},
+#' \code{a2}, \code{variant}, \code{gene}, \code{event}, \code{n}, \code{maf},
+#' \code{beta}, \code{se}, \code{pip}, \code{posterior_effect_mean},
+#' \code{posterior_effect_se}, \code{cs_95}, \code{cs_70}, \code{cs_50},
+#' \code{cs_95_purity}, \code{method}, \code{grange_start}, \code{grange_end}.
 #'
-#' The output column order is exactly (22 columns):
-#' \code{#chr}, \code{start}, \code{end}, \code{a1}, \code{a2},
-#' \code{variant}, \code{gene}, \code{event},
-#' \code{n}, \code{maf}, \code{beta}, \code{se},
-#' \code{pip}, \code{posterior_effect_mean}, \code{posterior_effect_se},
-#' \code{cs_95}, \code{cs_70}, \code{cs_50}, \code{cs_95_purity},
-#' \code{method}, \code{grange_start}, \code{grange_end}.
+#' \code{cs_95} / \code{cs_70} / \code{cs_50} are character strings of the
+#' form \code{"<method>_<cs_index>"} where each method numbers credible sets
+#' independently from 1. Variants retained by the PIP cutoff but not in any
+#' credible set at a coverage carry \code{"<method>_0"}. \code{cs_95_purity}
+#' is the 0.95-coverage purity for the row's \code{(method, cs_95)}; rows
+#' whose \code{cs_95} is \code{"<method>_0"} carry \code{0}.
 #'
-#' The \code{cs_95}, \code{cs_70}, \code{cs_50} columns are character strings of
-#' the form \code{"<method>_<cs_index>"} where each method numbers its credible
-#' sets independently starting at 1. Variants retained by the PIP cutoff but not
-#' assigned to any credible set at the given coverage use \code{"<method>_0"}.
-#' \code{cs_95_purity} is the 0.95-coverage credible-set purity for the row's
-#' \code{(method, cs_95)}; rows whose \code{cs_95} is \code{"<method>_0"} carry
-#' \code{cs_95_purity = 0}.
+#' Row uniqueness is \code{(variant, gene, cs_membership)} at the given
+#' \code{method}; overlapping CS within the same method produces one row per
+#' CS.
 #'
-#' Row uniqueness inside this function's output is one row per
-#' \code{(variant, gene, cs_membership)} at the given \code{method}. Overlapping
-#' credible-set membership for the same method produces one row per CS, so the
-#' overlapping-CS contract is preserved.
-#'
-#' @param fit A fitted SuSiE-family object (must expose \code{alpha},
+#' @param fit Fitted SuSiE-family object (must expose \code{alpha},
 #'   \code{mu}, \code{mu2}, \code{pip}).
-#' @param cs_tables A list of CS tables (one per coverage), as produced by
+#' @param cs_tables List of CS tables (one per coverage) from
 #'   \code{compute_cs_tables()}.
-#' @param variant_names Character vector of variant IDs in
-#'   \code{chr:pos:A2:A1} form, length equal to the number of variants in the
-#'   fit. Used to construct \code{variant}, \code{#chr}, \code{start},
-#'   \code{end}, \code{a1}, \code{a2}.
-#' @param sumstats Optional marginal-association summary statistics
-#'   (\code{betahat}, \code{sebetahat}) used to fill \code{beta} and \code{se}.
+#' @param variant_names Character vector of variant IDs
+#'   (\code{chr:pos:A2:A1}).
+#' @param sumstats Optional marginal-association summary (\code{betahat},
+#'   \code{sebetahat}) filling \code{beta} / \code{se}.
 #' @param maf Optional numeric vector of minor-allele frequencies.
-#' @param method Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Used
-#'   to construct the per-method \code{"<method>_<cs_index>"} strings and the
-#'   \code{method} column. Required.
+#' @param method Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Required.
 #' @param signal_cutoff PIP cutoff for retaining PIP-only (non-CS) variants.
-#' @param data_x Optional regional genotype matrix; used only for sample-count
-#'   shape checks.
+#' @param data_x Optional regional genotype matrix.
 #' @param data_y Optional regional phenotype matrix; \code{nrow(data_y)} fills
 #'   \code{n}, \code{colnames(data_y)[1]} fills \code{gene}.
-#' @param other_quantities Optional list with reserved subfields
-#'   \code{region} (e.g. \code{"chr1:100-200"}) and \code{condition_id}
-#'   (e.g. \code{"Ast_DeJager_eQTL"}); used to fill \code{grange_start},
-#'   \code{grange_end}, and to compose \code{event} as
-#'   \code{paste(condition_id, gene, sep = "_")}. Missing subfields are
-#'   filled with \code{NA} rather than dropped.
-#' @return A data frame in the fixed 22-column unified \code{top_loci} shape
-#'   for this one fit and one method. Returns an empty data frame with the
-#'   correct columns and dtypes if there is nothing to retain.
+#' @param other_quantities Optional list with reserved subfields \code{region}
+#'   (filling \code{grange_start} / \code{grange_end}) and \code{condition_id}
+#'   (composing \code{event} as \code{paste(condition_id, gene, sep = "_")}).
+#' @return A data frame in the fixed 22-column shape for this fit and method,
+#'   or an empty data frame if nothing is retained.
 #' @export
 build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
                            maf = NULL, method, signal_cutoff = 0.1,
@@ -501,123 +481,71 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
   fit_gene <- if (!is.null(data_y_mat) && !is.null(colnames(data_y_mat))) {
     colnames(data_y_mat)[1]
   } else NA_character_
-  fit_condition_id <- other_quantities$condition_id
-  fit_event <- if (!is.null(fit_condition_id) &&
+  fit_event <- if (!is.null(other_quantities$condition_id) &&
                    !is.na(fit_gene) && nzchar(fit_gene)) {
-    paste(fit_condition_id, fit_gene, sep = "_")
+    paste(other_quantities$condition_id, fit_gene, sep = "_")
   } else NA_character_
-  grange_se        <- .parse_grange(other_quantities$region)
-  fit_grange_start <- grange_se[["start"]]
-  fit_grange_end   <- grange_se[["end"]]
+  grange <- .parse_grange(other_quantities$region)
 
-  # Per-variant posterior effect and SE, computed once for all variants.
+  # Per-variant posterior effect / SE, computed once across all variants.
   alpha <- as.matrix(fit$alpha)
   mu    <- if (!is.null(fit$mu))  as.matrix(fit$mu)  else NULL
   mu2   <- if (!is.null(fit$mu2)) as.matrix(fit$mu2) else NULL
-  posterior_effect <- if (!is.null(mu) && all(dim(alpha) == dim(mu))) {
+  post_mean <- if (!is.null(mu) && all(dim(alpha) == dim(mu))) {
     colSums(alpha * mu)
   } else rep(NA_real_, length(variant_names))
-  posterior_effect_se <- if (!is.null(mu2) && all(dim(alpha) == dim(mu2))) {
-    sqrt(pmax(colSums(alpha * mu2) - posterior_effect^2, 0))
+  post_se <- if (!is.null(mu2) && all(dim(alpha) == dim(mu2))) {
+    sqrt(pmax(colSums(alpha * mu2) - post_mean^2, 0))
   } else rep(NA_real_, length(variant_names))
 
-  # Per-coverage credible-set purity vectors, indexed by 1-based CS index.
-  purity_per_cov <- lapply(cs_tables, function(ct) {
-    sets_purity <- ct$sets$purity
-    if (!is.null(sets_purity) &&
-        "min.abs.corr" %in% names(sets_purity)) {
-      as.numeric(sets_purity$min.abs.corr)
-    } else if (!is.null(ct$cs_corr)) {
-      vapply(seq_along(ct$cs_corr), function(j) {
-        m <- ct$cs_corr[[j]]
-        if (is.null(m)) return(NA_real_)
-        if (!is.matrix(m) || nrow(m) <= 1) return(1)
-        min(abs(m[upper.tri(m)]))
-      }, numeric(1))
-    } else {
-      rep(NA_real_, length(ct$sets$cs))
-    }
-  })
-
-  # Internal long-shaped collection: one row per
-  # (variant_idx, cs_idx_at_this_coverage, coverage). Not exposed. Used only
-  # to project to the 22-column shape below.
-  long_rows <- list()
-  for (i in seq_along(cs_tables)) {
+  # Collect CS-membership records (variant_idx, cs_idx, coverage) across all
+  # requested coverages. This is the only intermediate; the 22-column shape
+  # is projected from it below.
+  cs_records <- do.call(rbind, lapply(seq_along(cs_tables), function(i) {
     ct <- cs_tables[[i]]
-    top_variants_idx <- get_top_variants_idx(ct, signal_cutoff)
-    cs_info <- get_cs_info(ct$sets$cs, top_variants_idx)
-    if (is.null(cs_info) || nrow(cs_info) == 0) next
-    long_rows[[length(long_rows) + 1L]] <- data.frame(
-      variant_idx = as.integer(cs_info$variant_idx),
-      cs_idx      = as.integer(cs_info$cs_idx),
-      coverage    = as.numeric(coverage_values[[i]]),
-      stringsAsFactors = FALSE
-    )
-  }
-  if (length(long_rows) == 0) return(.empty_top_loci())
-  long_df <- do.call(rbind, long_rows)
-  if (nrow(long_df) == 0) return(.empty_top_loci())
-
-  # Key grid: one row per (variant_idx, cs_idx) at the export grain. Preserves
-  # overlapping CS membership for the same method.
-  key_grid <- unique(long_df[, c("variant_idx", "cs_idx"), drop = FALSE])
+    info <- get_cs_info(ct$sets$cs, get_top_variants_idx(ct, signal_cutoff))
+    if (is.null(info) || nrow(info) == 0) return(NULL)
+    data.frame(variant_idx = as.integer(info$variant_idx),
+               cs_idx      = as.integer(info$cs_idx),
+               coverage    = as.numeric(coverage_values[[i]]),
+               stringsAsFactors = FALSE)
+  }))
+  if (is.null(cs_records) || nrow(cs_records) == 0) return(.empty_top_loci())
+
+  # Key grid: one row per (variant_idx, cs_idx). Overlapping CS membership
+  # within this method is preserved as separate keys.
+  key_grid <- unique(cs_records[, c("variant_idx", "cs_idx"), drop = FALSE])
   rownames(key_grid) <- NULL
-  n_keys <- nrow(key_grid)
-
-  # Helper: did this (variant_idx, cs_idx) appear at this coverage? If yes,
-  # return cs_idx; otherwise 0.
-  lookup_cs_at_cov <- function(v_idx, c_idx, cov) {
-    sel <- long_df$variant_idx == v_idx &
-           long_df$cs_idx       == c_idx &
-           abs(long_df$coverage - cov) < 1e-12
-    if (any(sel)) as.integer(c_idx) else 0L
-  }
-  cov95_table_idx <- which(abs(coverage_values - 0.95) < 1e-12)
-
-  format_cs_string <- function(idx) {
-    if (is.na(idx) || idx <= 0L) paste0(method, "_0") else paste0(method, "_", idx)
-  }
-
-  cs_95        <- character(n_keys)
-  cs_70        <- character(n_keys)
-  cs_50        <- character(n_keys)
-  cs_95_purity <- numeric(n_keys)
-  for (k in seq_len(n_keys)) {
-    v_idx <- key_grid$variant_idx[k]
-    c_idx <- key_grid$cs_idx[k]
-    cs95_idx <- lookup_cs_at_cov(v_idx, c_idx, 0.95)
-    cs70_idx <- lookup_cs_at_cov(v_idx, c_idx, 0.70)
-    cs50_idx <- lookup_cs_at_cov(v_idx, c_idx, 0.50)
-    cs_95[k] <- format_cs_string(cs95_idx)
-    cs_70[k] <- format_cs_string(cs70_idx)
-    cs_50[k] <- format_cs_string(cs50_idx)
-    if (cs95_idx > 0L && length(cov95_table_idx) > 0L) {
-      pvec <- purity_per_cov[[cov95_table_idx[1]]]
-      cs_95_purity[k] <- if (cs95_idx <= length(pvec)) {
-        val <- pvec[cs95_idx]
-        if (is.na(val)) 0 else as.numeric(val)
-      } else 0
-    } else {
-      cs_95_purity[k] <- 0
-    }
-  }
-
-  # Per-variant lookups indexed by the row's variant_idx.
-  v_idx_vec       <- key_grid$variant_idx
-  variant_id_vec  <- variant_names[v_idx_vec]
-  pip_vec         <- as.numeric(fit$pip[v_idx_vec])
-  post_mean_vec   <- posterior_effect[v_idx_vec]
-  post_se_vec     <- posterior_effect_se[v_idx_vec]
-  beta_vec <- if (!is.null(sumstats$betahat))   sumstats$betahat[v_idx_vec]   else rep(NA_real_, n_keys)
-  se_vec   <- if (!is.null(sumstats$sebetahat)) sumstats$sebetahat[v_idx_vec] else rep(NA_real_, n_keys)
-  maf_vec  <- if (!is.null(maf))                maf[v_idx_vec]                else rep(NA_real_, n_keys)
-
+  n_keys  <- nrow(key_grid)
+  key_str <- paste(key_grid$variant_idx, key_grid$cs_idx, sep = ":")
+
+  # For each requested coverage, which keys appear in cs_records at that
+  # coverage? Returns the key's cs_idx if present, else 0L.
+  idx_at <- function(cov) {
+    at <- cs_records[abs(cs_records$coverage - cov) < 1e-12, , drop = FALSE]
+    hits <- paste(at$variant_idx, at$cs_idx, sep = ":")
+    ifelse(key_str %in% hits, key_grid$cs_idx, 0L)
+  }
+  idx95 <- idx_at(0.95); idx70 <- idx_at(0.70); idx50 <- idx_at(0.50)
+
+  # Per-coverage CS purity vectors (indexed by 1-based CS index). Only the
+  # 0.95-coverage purity is currently exported (as cs_95_purity); per-CS
+  # purities for the other coverages are kept here for downstream / future
+  # use even though they are not part of the 22-column output.
+  purity_per_cov <- lapply(cs_tables, .cs_purity_vec)
+  cov95          <- which(abs(coverage_values - 0.95) < 1e-12)
+  purity_95      <- if (length(cov95) > 0L) purity_per_cov[[cov95[1]]] else numeric()
+  cs_95_purity   <- vapply(idx95, function(i) {
+    if (i <= 0L || i > length(purity_95)) return(0)
+    v <- purity_95[i]; if (is.na(v)) 0 else as.numeric(v)
+  }, numeric(1))
+
+  v_idx          <- key_grid$variant_idx
+  variant_id_vec <- variant_names[v_idx]
   parsed <- tryCatch(
     suppressWarnings(parse_variant_id(variant_id_vec)),
-    error = function(e) {
-      stop("build_top_loci: parse_variant_id failed: ", conditionMessage(e))
-    }
+    error = function(e) stop("build_top_loci: parse_variant_id failed: ",
+                             conditionMessage(e))
   )
   if (is.null(parsed) || nrow(parsed) != length(variant_id_vec)) {
     stop("build_top_loci: parse_variant_id did not return one row per variant.")
@@ -626,10 +554,10 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
     is.na(parsed$A1) | !nzchar(parsed$A1) |
     is.na(parsed$A2) | !nzchar(parsed$A2)
   if (any(invalid)) {
-    first_bad <- variant_id_vec[which(invalid)[[1]]]
     stop("build_top_loci: parse_variant_id produced invalid coordinates ",
-         "for variant_id: ", first_bad)
+         "for variant_id: ", variant_id_vec[which(invalid)[[1]]])
   }
+  pick <- function(x) if (is.null(x)) rep(NA_real_, n_keys) else x[v_idx]
 
   out <- data.frame(
     "#chr"                = parsed$chrom,
@@ -641,19 +569,19 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
     gene                  = rep(fit_gene, n_keys),
     event                 = rep(fit_event, n_keys),
     n                     = rep(fit_n, n_keys),
-    maf                   = maf_vec,
-    beta                  = beta_vec,
-    se                    = se_vec,
-    pip                   = pip_vec,
-    posterior_effect_mean = post_mean_vec,
-    posterior_effect_se   = post_se_vec,
-    cs_95                 = cs_95,
-    cs_70                 = cs_70,
-    cs_50                 = cs_50,
+    maf                   = pick(maf),
+    beta                  = pick(sumstats$betahat),
+    se                    = pick(sumstats$sebetahat),
+    pip                   = as.numeric(fit$pip[v_idx]),
+    posterior_effect_mean = post_mean[v_idx],
+    posterior_effect_se   = post_se[v_idx],
+    cs_95                 = paste0(method, "_", idx95),
+    cs_70                 = paste0(method, "_", idx70),
+    cs_50                 = paste0(method, "_", idx50),
     cs_95_purity          = cs_95_purity,
     method                = rep(method, n_keys),
-    grange_start          = rep(fit_grange_start, n_keys),
-    grange_end            = rep(fit_grange_end, n_keys),
+    grange_start          = rep(grange[["start"]], n_keys),
+    grange_end            = rep(grange[["end"]],   n_keys),
     stringsAsFactors      = FALSE,
     check.names           = FALSE
   )
@@ -661,6 +589,23 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
   out
 }
 
+# Per-CS purity from one cs_table: prefer susieR's sets$purity$min.abs.corr;
+# fall back to cs_corr when purity is unavailable.
+.cs_purity_vec <- function(ct) {
+  sp <- ct$sets$purity
+  if (!is.null(sp) && "min.abs.corr" %in% names(sp)) {
+    return(as.numeric(sp$min.abs.corr))
+  }
+  if (!is.null(ct$cs_corr)) {
+    return(vapply(ct$cs_corr, function(m) {
+      if (is.null(m)) return(NA_real_)
+      if (!is.matrix(m) || nrow(m) <= 1) return(1)
+      min(abs(m[upper.tri(m)]))
+    }, numeric(1)))
+  }
+  rep(NA_real_, length(ct$sets$cs))
+}
+
 .empty_top_loci <- function() {
   data.frame(
     "#chr"                = character(),
@@ -785,16 +730,14 @@ trim_finemapping_fit <- function(fit, effect_idx, method, cs_tables) {
 #' Format Fine-mapping Post-processing for Protocol Output
 #'
 #' Converts method-aware fine-mapping post-processing output into the root-level
-#' fields consumed by protocol RDS files. The single top-loci output is the
-#' \code{top_loci} field (the 22-column unified table); there is no
-#' \code{top_loci_long}, no wide-format \code{top_loci}, and no
-#' \code{top_loci_export}.
+#' fields consumed by protocol RDS files. Exposes the single 22-column unified
+#' \code{top_loci} table alongside \code{susie_result_trimmed},
+#' \code{variant_names}, and method-specific intermediates.
 #'
 #' @param post Output from \code{\link{postprocess_finemapping_fits}}.
 #' @param primary_method Method whose result should populate root-level fields.
 #' @return A list with root-level fields including \code{variant_names},
-#'   \code{susie_result_trimmed}, and the single unified \code{top_loci}
-#'   22-column table.
+#'   \code{susie_result_trimmed}, and \code{top_loci}.
 #' @export
 format_finemapping_output <- function(post, primary_method) {
   method_post <- post$finemapping_results[[primary_method]]
diff --git a/vignettes/susie_wrapper.Rmd b/vignettes/susie_wrapper.Rmd
new file mode 100644
index 00000000..6bfac480
--- /dev/null
+++ b/vignettes/susie_wrapper.Rmd
@@ -0,0 +1,243 @@
+---
+title: "SuSiE wrapper unified top_loci output"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{SuSiE wrapper unified top_loci output}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
+```
+
+# Purpose
+
+This vignette is the schema-of-record for the unified `top_loci` table that
+`pecotmr`'s SuSiE wrapper exposes after the **build_top_loci migration**. It
+documents the 22-column shape, the credible-set string convention, the
+`(variant, gene, method, cs_membership)` row uniqueness rule, and shows a
+worked `postprocess_finemapping_fits()` example that row-binds `susie` and
+`susie_inf` rows into a single output.
+
+It is reference memory for the package; downstream callers (xqtl-protocol,
+transMap, etc.) consume the same single table.
+
+# Why one table
+
+Before this change the wrapper exposed three artifacts side-by-side:
+
+- `top_loci_long` — one row per (variant, method, coverage, CS membership);
+- `top_loci` — a wide table with `pip_<method>` and `CS_<cov>_<method>`
+  columns;
+- `top_loci_export` — a compact "export" projection built by a separate
+  helper.
+
+Three artifacts meant three formatter code paths, three sets of column
+naming conventions, and silent inconsistencies between long and wide were
+easy to introduce. The user request for this migration was simple:
+
+> "we will not have any build_top_loci_long and build_top_loci_width just a
+>  function named build_top_loci. ... there is only one build_top_loci
+>  function, no more various formats."
+
+The migration accordingly removes the long/wide/export trio (and their
+helpers) and replaces them with a single function
+[`build_top_loci()`](../R/susie_wrapper.R) that returns one table. The
+top-level `postprocess_finemapping_fits()` calls `build_top_loci()` once
+per method per fit and `rbind`s the per-method contributions into the
+single final `top_loci` table that `format_finemapping_output()` exposes.
+
+There is no separately exposed long table and no separately exposed wide
+table.
+
+# The 22-column schema
+
+The unified `top_loci` table has exactly these columns, in this order:
+
+| #  | Column                  | Type      | Source                                                        |
+|---:|-------------------------|-----------|---------------------------------------------------------------|
+|  1 | `#chr`                  | character | parsed from `variant`                                          |
+|  2 | `start`                 | integer   | `pos - 1` from parsed `variant` (BED 0-based)                   |
+|  3 | `end`                   | integer   | `pos` from parsed `variant` (BED half-open)                     |
+|  4 | `a1`                    | character | from parsed `variant`                                           |
+|  5 | `a2`                    | character | from parsed `variant`                                           |
+|  6 | `variant`               | character | the row's variant id in `chr:pos:A2:A1` form                    |
+|  7 | `gene`                  | character | `colnames(data_y)[1]` (phenotype identifier)                    |
+|  8 | `event`                 | character | `paste(other_quantities$condition_id, gene, sep="_")`          |
+|  9 | `n`                     | integer   | `nrow(data_y)` (analyzed sample count)                          |
+| 10 | `maf`                   | numeric   | from the `maf` argument                                          |
+| 11 | `beta`                  | numeric   | from `sumstats$betahat`                                          |
+| 12 | `se`                    | numeric   | from `sumstats$sebetahat`                                        |
+| 13 | `pip`                   | numeric   | per-variant SuSiE PIP                                            |
+| 14 | `posterior_effect_mean` | numeric   | `colSums(alpha * mu)`                                            |
+| 15 | `posterior_effect_se`   | numeric   | `sqrt(pmax(colSums(alpha * mu2) - posterior_effect_mean^2, 0))` |
+| 16 | `cs_95`                 | character | per-row CS membership at coverage 0.95 (see below)              |
+| 17 | `cs_70`                 | character | per-row CS membership at coverage 0.70                           |
+| 18 | `cs_50`                 | character | per-row CS membership at coverage 0.50                           |
+| 19 | `cs_95_purity`          | numeric   | 0.95-coverage CS purity for this row (see below)                 |
+| 20 | `method`                | character | `"susie"`, `"susie_inf"`, ...                                    |
+| 21 | `grange_start`          | integer   | parsed from `other_quantities$region`                            |
+| 22 | `grange_end`            | integer   | parsed from `other_quantities$region`                            |
+
+There are no `pip_<method>` or `CS_<cov>_<method>` wide-format columns, no
+`top_loci_long` column set (`variant_id` / `coverage` / `cs`), and no
+legacy `cs_coverage_0.95` / `PIP` / `variant_ID` / `gene_ID` /
+`event_ID` / `region_start` / `region_end` names. Downstream callers
+should read the columns above.
+
+## CS membership strings (`cs_95`, `cs_70`, `cs_50`)
+
+The credible-set columns are **character strings**, not integers. Each
+value has the form
+
+```
+<method>_<cs_index>
+```
+
+where `<method>` is the row's `method` column and `<cs_index>` is the
+**per-method 1-based** credible-set number. Two consequences:
+
+1. Each method numbers its credible sets independently starting at `1`.
+   `susie` may have `susie_1`, `susie_2`, ...; `susie_inf` may have
+   `susie_inf_1`, `susie_inf_2`, .... The CS index space is *not*
+   sequenced across methods.
+
+2. A variant that is retained by the PIP cutoff but is **not** in any
+   credible set at the given coverage carries the sentinel `"<method>_0"`.
+   For example, a `susie`-method row with no 0.70-coverage CS membership
+   carries `cs_70 = "susie_0"`.
+
+## `cs_95_purity`
+
+A single purity column. It holds the purity of the row's 0.95-coverage
+credible set, taken from `cs_tables[[i]]$sets$purity$min.abs.corr` (with a
+`cs_corr`-based fallback when `sets$purity` is unavailable). Rows whose
+`cs_95` is `"<method>_0"` (i.e. not in any 0.95-coverage CS) carry
+`cs_95_purity = 0`. There are no `cs_70_purity` or `cs_50_purity` columns
+at this stage.
+
+# Row uniqueness
+
+The row identity is `(variant, gene, method, cs_membership)`. Concretely:
+
+- A variant in a single CS within a single method produces **one** row.
+- A variant in CS 1 *and* CS 2 of the same method (overlapping CS) produces
+  **two** rows. Both share `(variant, gene, method)` and differ in
+  `cs_95` (e.g. one row `cs_95 = "susie_1"`, the other
+  `cs_95 = "susie_2"`).
+- A variant that appears in CS from more than one method (e.g. `susie` CS 1
+  and `susie_inf` CS 1) produces **two** rows. They share `(variant, gene)`
+  and differ in `method` (and in `cs_95` because each method numbers
+  independently).
+- A PIP-only retained variant (no CS membership at any coverage) produces
+  **one** row per method with
+  `cs_95 = cs_70 = cs_50 = "<method>_0"` and `cs_95_purity = 0`.
+
+This rule preserves overlapping-CS evidence both *within* a method (CS 1
+overlaps CS 2 in `susie`) and *across* methods (the same variant appears
+in `susie`'s CS and `susie_inf`'s CS).
+
+# Caller responsibilities
+
+`build_top_loci()` reads per-fit metadata from `data_x`, `data_y`, and a
+reserved `other_quantities` list. The caller (typically
+`univariate_analysis_pipeline()` and through it the xqtl-protocol
+`susie_twas` block and the transMap finemapping pipeline) supplies:
+
+| Field                          | Used for                            |
+|--------------------------------|-------------------------------------|
+| `data_y`                       | `n` and `gene` (column name)        |
+| `other_quantities$region`      | `grange_start`, `grange_end`        |
+| `other_quantities$condition_id`| `event` composition with `gene`     |
+
+If `other_quantities$region` is missing, `grange_start` and `grange_end`
+remain `NA` in the wrapper's output; transMap's
+`.transmap_finemap_top_loci_table()` then fills them from its own
+analysis-region metadata. If `other_quantities$condition_id` is missing,
+`event` falls back to `NA_character_`.
+
+# Worked example
+
+The example below sketches a typical wrapper run that fits both `susie`
+and `susie_inf` and shows that the final `top_loci` has one row per
+`(variant, gene, method, cs_membership)`.
+
+```{r setup, eval = FALSE}
+library(pecotmr)
+library(susieR)
+
+set.seed(42)
+n <- 300; p <- 50
+X <- matrix(rnorm(n * p), n, p)
+colnames(X) <- sprintf("chr1:%d:G:A", seq_len(p))
+beta <- rep(0, p); beta[c(10, 30)] <- 1.5
+y <- as.numeric(X %*% beta) + rnorm(n, sd = 0.5)
+Y <- matrix(y, ncol = 1, dimnames = list(NULL, "ENSG00000179403"))
+
+# fit_susie_inf_then_susie() is a pecotmr helper that returns both fits.
+fits <- fit_susie_inf_then_susie(X, y)
+
+# Caller-supplied per-fit metadata flows through other_quantities. The two
+# reserved subfields are documented contract for the unified export only;
+# other_quantities itself remains a free-form list (e.g. dropped_samples
+# stays accepted).
+oq <- list(
+  region       = "chr1:1-50",
+  condition_id = "ctx_eQTL"
+)
+
+post <- postprocess_finemapping_fits(
+  fits, data_x = X, data_y = Y,
+  coverage = 0.95, secondary_coverage = c(0.7, 0.5),
+  other_quantities = oq
+)
+
+out <- format_finemapping_output(post, primary_method = "susie")
+tl  <- out$top_loci
+
+# tl is the 22-column unified table. Verify the column order and the
+# per-method CS strings.
+stopifnot(identical(names(tl), c(
+  "#chr", "start", "end", "a1", "a2",
+  "variant", "gene", "event",
+  "n", "maf", "beta", "se",
+  "pip", "posterior_effect_mean", "posterior_effect_se",
+  "cs_95", "cs_70", "cs_50", "cs_95_purity",
+  "method", "grange_start", "grange_end"
+)))
+stopifnot(setequal(unique(tl$method), c("susie", "susie_inf")))
+stopifnot(all(grepl("^(susie|susie_inf)_\\d+$",
+                    c(tl$cs_95, tl$cs_70, tl$cs_50))))
+```
+
+A few things to notice in the worked example:
+
+- `out` is what xqtl-protocol / transMap callers receive. The previously
+  exposed `out$top_loci_long` field is gone; `out$top_loci` is the only
+  top-loci field.
+- `tl$method` is one column with multiple values (`"susie"`,
+  `"susie_inf"`); there are no per-method `pip_<method>` columns. To
+  inspect a single method's rows, filter on `tl$method`.
+- `tl$cs_95` etc. are character columns. If you need an integer CS index
+  for a particular method, strip the prefix:
+  `as.integer(sub("^.*_", "", tl$cs_95[tl$method == "susie"]))`.
+- A row whose `cs_95` is `"susie_0"` was retained by the PIP cutoff but is
+  not in any 0.95-coverage CS. Such rows have `cs_95_purity == 0` by
+  construction.
+
+# Downstream contract
+
+`format_finemapping_output()` exposes exactly one top-loci field,
+`top_loci`, with the 22 columns above. The previously exposed
+`top_loci_long` and wide `top_loci` (with `pip_<method>` /
+`CS_<cov>_<method>` columns) and the standalone `top_loci_export` are all
+removed. Non-top-loci fields (`susie_result_trimmed`, `variant_names`,
+`context_names`, `analysis_script`, ...) keep their previous semantics.
+
+The `FineMappingResult` S4 slot still satisfies its legacy validity
+check (it expects `variant_id` and `method` columns); `susie_wrapper.R`
+projects the new `top_loci` into a backward-compatible shape only for the
+S4 slot, so `AllClasses.R`, `AllMethods.R`, and `vcf_writer.R` do not need
+to change. End-user consumers should read the 22-column `top_loci`
+returned by `format_finemapping_output()`, not the slot.

From 784e6971a4d0fb4799eb07bfa1e332f473d51895 Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 15:20:01 +0000
Subject: [PATCH 08/14] Update documentation

---
 man/build_top_loci.Rd               | 82 +++++++++++------------------
 man/format_finemapping_output.Rd    | 10 ++--
 man/postprocess_finemapping_fits.Rd |  8 ++-
 3 files changed, 39 insertions(+), 61 deletions(-)

diff --git a/man/build_top_loci.Rd b/man/build_top_loci.Rd
index ef2024ba..dd76e581 100644
--- a/man/build_top_loci.Rd
+++ b/man/build_top_loci.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/susie_wrapper.R
 \name{build_top_loci}
 \alias{build_top_loci}
-\title{Build the unified single top-loci table for one fit and one method.}
+\title{Build the unified top-loci table for one fit and one method.}
 \usage{
 build_top_loci(
   fit,
@@ -18,76 +18,58 @@ build_top_loci(
 )
 }
 \arguments{
-\item{fit}{A fitted SuSiE-family object (must expose \code{alpha},
+\item{fit}{Fitted SuSiE-family object (must expose \code{alpha},
 \code{mu}, \code{mu2}, \code{pip}).}
 
-\item{cs_tables}{A list of CS tables (one per coverage), as produced by
+\item{cs_tables}{List of CS tables (one per coverage) from
 \code{compute_cs_tables()}.}
 
-\item{variant_names}{Character vector of variant IDs in
-\code{chr:pos:A2:A1} form, length equal to the number of variants in the
-fit. Used to construct \code{variant}, \code{#chr}, \code{start},
-\code{end}, \code{a1}, \code{a2}.}
+\item{variant_names}{Character vector of variant IDs
+(\code{chr:pos:A2:A1}).}
 
-\item{sumstats}{Optional marginal-association summary statistics
-(\code{betahat}, \code{sebetahat}) used to fill \code{beta} and \code{se}.}
+\item{sumstats}{Optional marginal-association summary (\code{betahat},
+\code{sebetahat}) filling \code{beta} / \code{se}.}
 
 \item{maf}{Optional numeric vector of minor-allele frequencies.}
 
-\item{method}{Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Used
-to construct the per-method \code{"<method>_<cs_index>"} strings and the
-\code{method} column. Required.}
+\item{method}{Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Required.}
 
 \item{signal_cutoff}{PIP cutoff for retaining PIP-only (non-CS) variants.}
 
-\item{data_x}{Optional regional genotype matrix; used only for sample-count
-shape checks.}
+\item{data_x}{Optional regional genotype matrix.}
 
 \item{data_y}{Optional regional phenotype matrix; \code{nrow(data_y)} fills
 \code{n}, \code{colnames(data_y)[1]} fills \code{gene}.}
 
-\item{other_quantities}{Optional list with reserved subfields
-\code{region} (e.g. \code{"chr1:100-200"}) and \code{condition_id}
-(e.g. \code{"Ast_DeJager_eQTL"}); used to fill \code{grange_start},
-\code{grange_end}, and to compose \code{event} as
-\code{paste(condition_id, gene, sep = "_")}. Missing subfields are
-filled with \code{NA} rather than dropped.}
+\item{other_quantities}{Optional list with reserved subfields \code{region}
+(filling \code{grange_start} / \code{grange_end}) and \code{condition_id}
+(composing \code{event} as \code{paste(condition_id, gene, sep = "_")}).}
 }
 \value{
-A data frame in the fixed 22-column unified \code{top_loci} shape
-  for this one fit and one method. Returns an empty data frame with the
-  correct columns and dtypes if there is nothing to retain.
+A data frame in the fixed 22-column shape for this fit and method,
+  or an empty data frame if nothing is retained.
 }
 \description{
-Produces the per-fit, per-method contribution to the unified \code{top_loci}
-table in the fixed 22-column shape. The outer
-\code{postprocess_finemapping_fits()} loop calls this once per method per fit
-and row-binds the results into the single final \code{top_loci} table that is
-exposed by \code{format_finemapping_output()}.
+Returns the per-fit, per-method contribution to the unified \code{top_loci}
+table in the fixed 22-column shape. \code{postprocess_finemapping_fits()}
+calls this once per method per fit and row-binds the results into the
+single \code{top_loci} returned by \code{format_finemapping_output()}.
 }
 \details{
-This function replaces the previous \code{build_top_loci_long()},
-\code{build_top_loci_wide()}, and \code{build_top_loci_export()} trio.
-There is no separately exposed long or wide table.
+Output columns, in order: \code{#chr}, \code{start}, \code{end}, \code{a1},
+\code{a2}, \code{variant}, \code{gene}, \code{event}, \code{n}, \code{maf},
+\code{beta}, \code{se}, \code{pip}, \code{posterior_effect_mean},
+\code{posterior_effect_se}, \code{cs_95}, \code{cs_70}, \code{cs_50},
+\code{cs_95_purity}, \code{method}, \code{grange_start}, \code{grange_end}.
 
-The output column order is exactly (22 columns):
-\code{#chr}, \code{start}, \code{end}, \code{a1}, \code{a2},
-\code{variant}, \code{gene}, \code{event},
-\code{n}, \code{maf}, \code{beta}, \code{se},
-\code{pip}, \code{posterior_effect_mean}, \code{posterior_effect_se},
-\code{cs_95}, \code{cs_70}, \code{cs_50}, \code{cs_95_purity},
-\code{method}, \code{grange_start}, \code{grange_end}.
+\code{cs_95} / \code{cs_70} / \code{cs_50} are character strings of the
+form \code{"<method>_<cs_index>"} where each method numbers credible sets
+independently from 1. Variants retained by the PIP cutoff but not in any
+credible set at a coverage carry \code{"<method>_0"}. \code{cs_95_purity}
+is the 0.95-coverage purity for the row's \code{(method, cs_95)}; rows
+whose \code{cs_95} is \code{"<method>_0"} carry \code{0}.
 
-The \code{cs_95}, \code{cs_70}, \code{cs_50} columns are character strings of
-the form \code{"<method>_<cs_index>"} where each method numbers its credible
-sets independently starting at 1. Variants retained by the PIP cutoff but not
-assigned to any credible set at the given coverage use \code{"<method>_0"}.
-\code{cs_95_purity} is the 0.95-coverage credible-set purity for the row's
-\code{(method, cs_95)}; rows whose \code{cs_95} is \code{"<method>_0"} carry
-\code{cs_95_purity = 0}.
-
-Row uniqueness inside this function's output is one row per
-\code{(variant, gene, cs_membership)} at the given \code{method}. Overlapping
-credible-set membership for the same method produces one row per CS, so the
-overlapping-CS contract is preserved.
+Row uniqueness is \code{(variant, gene, cs_membership)} at the given
+\code{method}; overlapping CS within the same method produces one row per
+CS.
 }
diff --git a/man/format_finemapping_output.Rd b/man/format_finemapping_output.Rd
index 7c48d25c..95e290c4 100644
--- a/man/format_finemapping_output.Rd
+++ b/man/format_finemapping_output.Rd
@@ -13,13 +13,11 @@ format_finemapping_output(post, primary_method)
 }
 \value{
 A list with root-level fields including \code{variant_names},
-  \code{susie_result_trimmed}, and the single unified \code{top_loci}
-  22-column table.
+  \code{susie_result_trimmed}, and \code{top_loci}.
 }
 \description{
 Converts method-aware fine-mapping post-processing output into the root-level
-fields consumed by protocol RDS files. The single top-loci output is the
-\code{top_loci} field (the 22-column unified table); there is no
-\code{top_loci_long}, no wide-format \code{top_loci}, and no
-\code{top_loci_export}.
+fields consumed by protocol RDS files. Exposes the single 22-column unified
+\code{top_loci} table alongside \code{susie_result_trimmed},
+\code{variant_names}, and method-specific intermediates.
 }
diff --git a/man/postprocess_finemapping_fits.Rd b/man/postprocess_finemapping_fits.Rd
index bbd7cec1..558d0ccb 100644
--- a/man/postprocess_finemapping_fits.Rd
+++ b/man/postprocess_finemapping_fits.Rd
@@ -50,11 +50,9 @@ input used for credible-set purity and correlations.}
 \value{
 A list with \code{finemapping_results} (per-method post-processed
   objects, each carrying a trimmed fit and method-specific intermediates)
-  and the single unified \code{top_loci} table in the fixed 22-column
-  shape (see \code{\link{build_top_loci}}). Per-method contributions are
-  produced by \code{build_top_loci()} once per method and row-bound into
-  this single \code{top_loci}. There is no separately exposed
-  \code{top_loci_long} or wide-format \code{top_loci}.
+  and a single unified \code{top_loci} table in the fixed 22-column shape
+  (see \code{\link{build_top_loci}}). Per-method contributions are
+  row-bound into \code{top_loci} by an outer method for-loop.
 }
 \description{
 Applies method-aware post-processing to one or more SuSiE-family fits and

From b71943816e9e1d33354f601970b5d2ae7814ccc3 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 10:43:22 -0500
Subject: [PATCH 09/14] Update test_susie_wrapper.R

---
 tests/testthat/test_susie_wrapper.R | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R
index fd9eb70c..dee46356 100644
--- a/tests/testthat/test_susie_wrapper.R
+++ b/tests/testthat/test_susie_wrapper.R
@@ -258,9 +258,10 @@ test_that("susie_rss_pipeline runs with single_effect method", {
   expect_true(is.list(result))
   expect_true("variant_names" %in% names(result))
   expect_true("susie_result_trimmed" %in% names(result))
-  if (!is.null(result$top_loci)) {
-    expect_true("pip_single_effect" %in% names(result$top_loci))
-    expect_true("CS_95_single_effect" %in% names(result$top_loci))
+  if (!is.null(result$top_loci) && nrow(result$top_loci) > 0) {
+    expect_true("pip"   %in% names(result$top_loci))
+    expect_true("cs_95" %in% names(result$top_loci))
+    expect_true(all(result$top_loci$method == "single_effect"))
   }
   # PIPs should be numeric, in [0,1], and sum to at most 1 (L=1)
   pip <- result$susie_result_trimmed$pip
@@ -293,9 +294,10 @@ test_that("susie_rss_pipeline runs with bayesian_conditional_regression", {
   )
   expect_true(is.list(result))
   expect_true("susie_result_trimmed" %in% names(result))
-  if (!is.null(result$top_loci)) {
-    expect_true("pip_bayesian_conditional_regression" %in% names(result$top_loci))
-    expect_true("CS_95_bayesian_conditional_regression" %in% names(result$top_loci))
+  if (!is.null(result$top_loci) && nrow(result$top_loci) > 0) {
+    expect_true("pip"   %in% names(result$top_loci))
+    expect_true("cs_95" %in% names(result$top_loci))
+    expect_true(all(result$top_loci$method == "bayesian_conditional_regression"))
   }
   pip <- result$susie_result_trimmed$pip
   expect_true(is.numeric(pip))

From f253d5872af7a2e5ebdd9c2ee0ac20db1c5a4c6a Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 12:44:16 -0500
Subject: [PATCH 10/14] change fit_susie_inf to add_susie_inf

---
 R/univariate_pipeline.R                   | 14 +++++++-------
 man/univariate_analysis_pipeline.Rd       |  4 ++--
 tests/testthat/test_univariate_pipeline.R |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index 3339cafc..b891c4b2 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -25,7 +25,7 @@
 #' @param finemapping_extra_opts Additional options passed to \code{susieR::susie()}.
 #'   SuSiE-inf is always fitted with \code{refine = FALSE}; the ordinary SuSiE
 #'   fit keeps these options and is initialized with \code{model_init}.
-#' @param fit_susie_inf Whether to fit SuSiE-inf before ordinary SuSiE. Default
+#' @param add_susie_inf Whether to fit SuSiE-inf before ordinary SuSiE. Default
 #'   is TRUE for existing pipeline compatibility. If FALSE, ordinary SuSiE is
 #'   fitted directly and SuSiE-inf fitted objects/results are not returned.
 #' @param twas_weights Whether to compute TWAS weights. Default is TRUE.
@@ -61,7 +61,7 @@ univariate_analysis_pipeline <- function(
     coverage = c(0.95, 0.7, 0.5),
     min_abs_corr = 0.8,
     finemapping_extra_opts = list(refine = TRUE),
-    fit_susie_inf = TRUE,
+    add_susie_inf = TRUE,
     # TWAS weights and CV for TWAS weights
     twas_weights = TRUE,
     sample_partition = NULL,
@@ -79,11 +79,11 @@ univariate_analysis_pipeline <- function(
   if (!is.numeric(Y_scalar) || length(Y_scalar) != 1) stop("Y_scalar must be a numeric scalar")
   if (!is.numeric(L) || L <= 0) stop("L must be a positive integer")
   if (!is.null(L_greedy) && (!is.numeric(L_greedy) || L_greedy <= 0)) stop("L_greedy must be NULL or a positive integer")
-  if (!is.logical(fit_susie_inf) || length(fit_susie_inf) != 1 || is.na(fit_susie_inf)) {
-    stop("fit_susie_inf must be TRUE or FALSE")
+  if (!is.logical(add_susie_inf) || length(add_susie_inf) != 1 || is.na(add_susie_inf)) {
+    stop("add_susie_inf must be TRUE or FALSE")
   }
-  if (!isTRUE(fit_susie_inf) && isTRUE(twas_weights)) {
-    stop("fit_susie_inf = FALSE is not compatible with twas_weights = TRUE")
+  if (!isTRUE(add_susie_inf) && isTRUE(twas_weights)) {
+    stop("add_susie_inf = FALSE is not compatible with twas_weights = TRUE")
   }
 
   # Initial PIP check
@@ -126,7 +126,7 @@ univariate_analysis_pipeline <- function(
     finemapping_extra_opts,
     list(L = L, L_greedy = L_greedy, coverage = coverage[1])
   )
-  if (isTRUE(fit_susie_inf)) {
+  if (isTRUE(add_susie_inf)) {
     message("Fitting SuSiE-inf model on input data ...")
     message("Fitting SuSiE model initialized by SuSiE-inf ...")
     fitted_models <- fit_susie_inf_then_susie(
diff --git a/man/univariate_analysis_pipeline.Rd b/man/univariate_analysis_pipeline.Rd
index 2f54b4b5..090de230 100644
--- a/man/univariate_analysis_pipeline.Rd
+++ b/man/univariate_analysis_pipeline.Rd
@@ -23,7 +23,7 @@ univariate_analysis_pipeline(
   coverage = c(0.95, 0.7, 0.5),
   min_abs_corr = 0.8,
   finemapping_extra_opts = list(refine = TRUE),
-  fit_susie_inf = TRUE,
+  add_susie_inf = TRUE,
   twas_weights = TRUE,
   sample_partition = NULL,
   max_cv_variants = -1,
@@ -72,7 +72,7 @@ which is stricter than the susieR default of 0.5.}
 SuSiE-inf is always fitted with \code{refine = FALSE}; the ordinary SuSiE
 fit keeps these options and is initialized with \code{model_init}.}
 
-\item{fit_susie_inf}{Whether to fit SuSiE-inf before ordinary SuSiE. Default
+\item{add_susie_inf}{Whether to fit SuSiE-inf before ordinary SuSiE. Default
 is TRUE for existing pipeline compatibility. If FALSE, ordinary SuSiE is
 fitted directly and SuSiE-inf fitted objects/results are not returned.}
 
diff --git a/tests/testthat/test_univariate_pipeline.R b/tests/testthat/test_univariate_pipeline.R
index a8dacb99..f974cf94 100644
--- a/tests/testthat/test_univariate_pipeline.R
+++ b/tests/testthat/test_univariate_pipeline.R
@@ -640,7 +640,7 @@ test_that("uap: ordinary susie can run without susie-inf initialization", {
 
   result <- univariate_analysis_pipeline(
     X = inp$X, Y = inp$Y, maf = inp$maf,
-    twas_weights = FALSE, fit_susie_inf = FALSE,
+    twas_weights = FALSE, add_susie_inf = FALSE,
     L = 15, L_greedy = 7
   )
   expect_length(captured_calls, 1)

From 53a1495236336a6de182261bc6f7c5924d83d581 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 14:47:22 -0500
Subject: [PATCH 11/14] add region to pecotmr and remove from other quantities

---
 R/susie_wrapper.R                   | 16 +++++----
 R/univariate_pipeline.R             |  5 ++-
 tests/testthat/test_susie_wrapper.R | 14 ++++----
 vignettes/susie_wrapper.Rmd         | 54 ++++++++++++++---------------
 4 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R
index 03290976..357e9c25 100644
--- a/R/susie_wrapper.R
+++ b/R/susie_wrapper.R
@@ -180,6 +180,7 @@ postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL,
                                          secondary_coverage = c(0.7, 0.5),
                                          signal_cutoff = 0.1,
                                          other_quantities = NULL,
+                                         region = NULL,
                                          prior_eff_tol = 1e-9,
                                          min_abs_corr = 0.8) {
   fits <- fits[!vapply(fits, is.null, logical(1))]
@@ -198,6 +199,7 @@ postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL,
       X_scalar = X_scalar, y_scalar = y_scalar, maf = maf,
       coverage = coverage, secondary_coverage = secondary_coverage,
       signal_cutoff = signal_cutoff, other_quantities = other_quantities,
+      region = region,
       prior_eff_tol = prior_eff_tol, min_abs_corr = min_abs_corr
     )
   })
@@ -257,6 +259,7 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
                                                 secondary_coverage = c(0.7, 0.5),
                                                 signal_cutoff = 0.1,
                                                 other_quantities = NULL,
+                                                region = NULL,
                                                 prior_eff_tol = 1e-9,
                                                 min_abs_corr = 0.8,
                                                 cs_input = c("X", "Xcorr", "fsusie")) {
@@ -272,7 +275,8 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", ...) {
   top_loci <- build_top_loci(
     fit, cs_tables, variant_names = variant_names, sumstats = sumstats,
     maf = maf, method = method, signal_cutoff = signal_cutoff,
-    data_x = data_x, data_y = data_y, other_quantities = other_quantities
+    data_x = data_x, data_y = data_y, other_quantities = other_quantities,
+    region = region
   )
 
   trimmed <- trim_finemapping_fit(fit, effect_idx, method, cs_tables)
@@ -457,16 +461,16 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", "
 #' @param data_x Optional regional genotype matrix.
 #' @param data_y Optional regional phenotype matrix; \code{nrow(data_y)} fills
 #'   \code{n}, \code{colnames(data_y)[1]} fills \code{gene}.
-#' @param other_quantities Optional list with reserved subfields \code{region}
-#'   (filling \code{grange_start} / \code{grange_end}) and \code{condition_id}
-#'   (composing \code{event} as \code{paste(condition_id, gene, sep = "_")}).
+#' @param other_quantities Optional list. Default is NULL.
+#' @param region Optional \code{"chr:start-end"} string. Default is NULL.
 #' @return A data frame in the fixed 22-column shape for this fit and method,
 #'   or an empty data frame if nothing is retained.
 #' @export
 build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
                            maf = NULL, method, signal_cutoff = 0.1,
                            data_x = NULL, data_y = NULL,
-                           other_quantities = NULL) {
+                           other_quantities = NULL,
+                           region = NULL) {
   if (missing(method) || is.null(method) ||
       length(method) != 1L || is.na(method) || !nzchar(method)) {
     stop("build_top_loci: `method` is required (e.g. \"susie\", \"susie_inf\").")
@@ -485,7 +489,7 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL,
                    !is.na(fit_gene) && nzchar(fit_gene)) {
     paste(other_quantities$condition_id, fit_gene, sep = "_")
   } else NA_character_
-  grange <- .parse_grange(other_quantities$region)
+  grange <- .parse_grange(region)
 
   # Per-variant posterior effect / SE, computed once across all variants.
   alpha <- as.matrix(fit$alpha)
diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R
index b891c4b2..b09761e0 100644
--- a/R/univariate_pipeline.R
+++ b/R/univariate_pipeline.R
@@ -11,6 +11,7 @@
 #' @param maf A vector of minor allele frequencies for each variant in X.
 #' @param X_variance Optional variance of X. Default is NULL.
 #' @param other_quantities A list of other quantities to be carried into fine-mapping post-processing. Default is an empty list.
+#' @param region Optional \code{"chr:start-end"} string for the analysis region. Default is NULL.
 #' @param imiss_cutoff Individual missingness cutoff. Default is 1.0.
 #' @param maf_cutoff Minor allele frequency cutoff. Default is NULL.
 #' @param xvar_cutoff Variance cutoff for X. Default is 0.05.
@@ -47,6 +48,7 @@ univariate_analysis_pipeline <- function(
     Y_scalar = 1,
     X_variance = NULL,
     other_quantities = list(),
+    region = NULL,
     # filters
     imiss_cutoff = 1.0,
     maf_cutoff = NULL,
@@ -158,7 +160,8 @@ univariate_analysis_pipeline <- function(
     secondary_coverage = if (length(coverage) > 1) coverage[-1] else NULL,
     signal_cutoff = signal_cutoff,
     min_abs_corr = min_abs_corr,
-    other_quantities = other_quantities
+    other_quantities = other_quantities,
+    region = region
   )
   res <- c(res, format_finemapping_output(susie_post, primary_method = "susie"))
   if (!is.null(susie_post$finemapping_results$susie_inf)) {
diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R
index dee46356..c0df1b9e 100644
--- a/tests/testthat/test_susie_wrapper.R
+++ b/tests/testthat/test_susie_wrapper.R
@@ -890,14 +890,16 @@ if (!exists(".make_univariate_data", inherits = FALSE)) {
 
 .run_build_top_loci <- function(inp, method = "susie", signal_cutoff = 0.05,
                                 sumstats = NULL, maf = NULL,
-                                other_quantities = NULL) {
+                                other_quantities = NULL,
+                                region = NULL) {
   build_top_loci(
     fit = inp$fit, cs_tables = inp$cs_tables,
     variant_names = inp$variant_names,
     sumstats = sumstats, maf = maf,
     method = method, signal_cutoff = signal_cutoff,
     data_x = inp$data_x, data_y = inp$data_y,
-    other_quantities = other_quantities
+    other_quantities = other_quantities,
+    region = region
   )
 }
 
@@ -940,13 +942,13 @@ test_that("build_top_loci emits 22 columns in the fixed order on a non-empty fit
                                             "0.7"  = list(c(1L, 2L)),
                                             "0.5"  = list(c(1L, 2L))),
                           n_samples = 419, n_variants = 11332)
-  other_q <- list(region = "chr10:10823338-14348298",
-                  condition_id = "Ast_DeJager_eQTL")
+  other_q <- list(condition_id = "Ast_DeJager_eQTL")
   out <- .run_build_top_loci(inp, method = "susie",
                              sumstats = list(betahat = c(0.2, -0.1),
                                              sebetahat = c(0.05, 0.04)),
                              maf = c(0.10, 0.25),
-                             other_quantities = other_q)
+                             other_quantities = other_q,
+                             region = "chr10:10823338-14348298")
   expect_equal(names(out), .UNIFIED_TOP_LOCI_COLS)
   expect_equal(unique(out$gene), "ENSG00000179403")
   expect_equal(unique(out$event), "Ast_DeJager_eQTL_ENSG00000179403")
@@ -1161,7 +1163,7 @@ test_that("non-top-loci wrapper fields (susie_result_trimmed, variant_names) are
   expect_true("variant_names" %in% names(out))
 })
 
-test_that("missing other_quantities$region produces NA grange columns rather than silent omission", {
+test_that("missing region produces NA grange columns rather than silent omission", {
   variant_ids <- c("chr1:100:A:G")
   cs_at_cov <- list("0.95" = list(1L),
                     "0.7"  = list(1L),
diff --git a/vignettes/susie_wrapper.Rmd b/vignettes/susie_wrapper.Rmd
index 6bfac480..51e9ba8b 100644
--- a/vignettes/susie_wrapper.Rmd
+++ b/vignettes/susie_wrapper.Rmd
@@ -77,8 +77,8 @@ The unified `top_loci` table has exactly these columns, in this order:
 | 18 | `cs_50`                 | character | per-row CS membership at coverage 0.50                           |
 | 19 | `cs_95_purity`          | numeric   | 0.95-coverage CS purity for this row (see below)                 |
 | 20 | `method`                | character | `"susie"`, `"susie_inf"`, ...                                    |
-| 21 | `grange_start`          | integer   | parsed from `other_quantities$region`                            |
-| 22 | `grange_end`            | integer   | parsed from `other_quantities$region`                            |
+| 21 | `grange_start`          | integer   | parsed from top-level `region` arg                               |
+| 22 | `grange_end`            | integer   | parsed from top-level `region` arg                               |
 
 There are no `pip_<method>` or `CS_<cov>_<method>` wide-format columns, no
 `top_loci_long` column set (`variant_id` / `coverage` / `cs`), and no
@@ -140,22 +140,25 @@ in `susie`'s CS and `susie_inf`'s CS).
 
 # Caller responsibilities
 
-`build_top_loci()` reads per-fit metadata from `data_x`, `data_y`, and a
-reserved `other_quantities` list. The caller (typically
-`univariate_analysis_pipeline()` and through it the xqtl-protocol
-`susie_twas` block and the transMap finemapping pipeline) supplies:
-
-| Field                          | Used for                            |
-|--------------------------------|-------------------------------------|
-| `data_y`                       | `n` and `gene` (column name)        |
-| `other_quantities$region`      | `grange_start`, `grange_end`        |
-| `other_quantities$condition_id`| `event` composition with `gene`     |
-
-If `other_quantities$region` is missing, `grange_start` and `grange_end`
-remain `NA` in the wrapper's output; transMap's
-`.transmap_finemap_top_loci_table()` then fills them from its own
-analysis-region metadata. If `other_quantities$condition_id` is missing,
-`event` falls back to `NA_character_`.
+`build_top_loci()` reads per-fit metadata from `data_x`, `data_y`, a
+reserved `other_quantities` list, and a top-level `region` argument. The
+caller (typically `univariate_analysis_pipeline()` and through it the
+xqtl-protocol `susie_twas` block and the transMap finemapping pipeline)
+supplies:
+
+| Field                            | Used for                            |
+|----------------------------------|-------------------------------------|
+| `data_y`                         | `n` and `gene` (column name)        |
+| `region` (top-level)             | `grange_start`, `grange_end` (parsed by `parse_region()`) |
+| `other_quantities$condition_id`  | `event` composition with `gene`     |
+
+`region` is a top-level `"chr:start-end"` string argument of
+`univariate_analysis_pipeline()` (and `postprocess_finemapping_fits()` /
+`build_top_loci()`). When `region` is `NULL`, `grange_start` and
+`grange_end` are `NA`. If `other_quantities$condition_id` is missing,
+`event` falls back to `NA_character_`. `other_quantities` remains a
+free-form list (e.g. `dropped_samples` stays accepted); only
+`condition_id` is consumed by the unified export.
 
 # Worked example
 
@@ -178,19 +181,16 @@ Y <- matrix(y, ncol = 1, dimnames = list(NULL, "ENSG00000179403"))
 # fit_susie_inf_then_susie() is a pecotmr helper that returns both fits.
 fits <- fit_susie_inf_then_susie(X, y)
 
-# Caller-supplied per-fit metadata flows through other_quantities. The two
-# reserved subfields are documented contract for the unified export only;
-# other_quantities itself remains a free-form list (e.g. dropped_samples
-# stays accepted).
-oq <- list(
-  region       = "chr1:1-50",
-  condition_id = "ctx_eQTL"
-)
+# Caller-supplied per-fit metadata. `condition_id` flows through
+# other_quantities (which remains a free-form list); the analysis region
+# flows through `region`, a top-level argument.
+oq <- list(condition_id = "ctx_eQTL")
 
 post <- postprocess_finemapping_fits(
   fits, data_x = X, data_y = Y,
   coverage = 0.95, secondary_coverage = c(0.7, 0.5),
-  other_quantities = oq
+  other_quantities = oq,
+  region           = "chr1:1-50"
 )
 
 out <- format_finemapping_output(post, primary_method = "susie")

From c50f7dc0004c55bcc4c186dbc60233b0393ac6b4 Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sat, 30 May 2026 19:48:42 +0000
Subject: [PATCH 12/14] Update documentation

---
 man/build_top_loci.Rd               | 9 +++++----
 man/postprocess_finemapping_fits.Rd | 1 +
 man/univariate_analysis_pipeline.Rd | 3 +++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/man/build_top_loci.Rd b/man/build_top_loci.Rd
index dd76e581..f44ae5a3 100644
--- a/man/build_top_loci.Rd
+++ b/man/build_top_loci.Rd
@@ -14,7 +14,8 @@ build_top_loci(
   signal_cutoff = 0.1,
   data_x = NULL,
   data_y = NULL,
-  other_quantities = NULL
+  other_quantities = NULL,
+  region = NULL
 )
 }
 \arguments{
@@ -41,9 +42,9 @@ build_top_loci(
 \item{data_y}{Optional regional phenotype matrix; \code{nrow(data_y)} fills
 \code{n}, \code{colnames(data_y)[1]} fills \code{gene}.}
 
-\item{other_quantities}{Optional list with reserved subfields \code{region}
-(filling \code{grange_start} / \code{grange_end}) and \code{condition_id}
-(composing \code{event} as \code{paste(condition_id, gene, sep = "_")}).}
+\item{other_quantities}{Optional list. Default is NULL.}
+
+\item{region}{Optional \code{"chr:start-end"} string. Default is NULL.}
 }
 \value{
 A data frame in the fixed 22-column shape for this fit and method,
diff --git a/man/postprocess_finemapping_fits.Rd b/man/postprocess_finemapping_fits.Rd
index 558d0ccb..3f16a70c 100644
--- a/man/postprocess_finemapping_fits.Rd
+++ b/man/postprocess_finemapping_fits.Rd
@@ -15,6 +15,7 @@ postprocess_finemapping_fits(
   secondary_coverage = c(0.7, 0.5),
   signal_cutoff = 0.1,
   other_quantities = NULL,
+  region = NULL,
   prior_eff_tol = 1e-09,
   min_abs_corr = 0.8
 )
diff --git a/man/univariate_analysis_pipeline.Rd b/man/univariate_analysis_pipeline.Rd
index 090de230..06043f9a 100644
--- a/man/univariate_analysis_pipeline.Rd
+++ b/man/univariate_analysis_pipeline.Rd
@@ -12,6 +12,7 @@ univariate_analysis_pipeline(
   Y_scalar = 1,
   X_variance = NULL,
   other_quantities = list(),
+  region = NULL,
   imiss_cutoff = 1,
   maf_cutoff = NULL,
   xvar_cutoff = 0,
@@ -47,6 +48,8 @@ univariate_analysis_pipeline(
 
 \item{other_quantities}{A list of other quantities to be carried into fine-mapping post-processing. Default is an empty list.}
 
+\item{region}{Optional \code{"chr:start-end"} string for the analysis region. Default is NULL.}
+
 \item{imiss_cutoff}{Individual missingness cutoff. Default is 1.0.}
 
 \item{maf_cutoff}{Minor allele frequency cutoff. Default is NULL.}

From 4cde25930d6c49bfb1b0ca8c046969cf050ce27f Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sun, 31 May 2026 00:38:39 -0400
Subject: [PATCH 13/14] Update multivariate_pipeline.R

---
 R/multivariate_pipeline.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/multivariate_pipeline.R b/R/multivariate_pipeline.R
index 70dc4f3e..913d9c42 100644
--- a/R/multivariate_pipeline.R
+++ b/R/multivariate_pipeline.R
@@ -71,6 +71,7 @@ multivariate_analysis_pipeline <- function(
     maf,
     X_variance = NULL,
     other_quantities = list(),
+    region = NULL,
     # filters
     imiss_cutoff = 1.0,
     maf_cutoff = 0.01,
@@ -304,7 +305,8 @@ multivariate_analysis_pipeline <- function(
     secondary_coverage = sec_coverage,
     signal_cutoff = signal_cutoff,
     min_abs_corr = min_abs_corr,
-    other_quantities = other_quantities
+    other_quantities = other_quantities,
+    region = region
   )
   res <- c(res, format_finemapping_output(mvsusie_post, primary_method = "mvsusie"))
   res$total_time_elapsed <- proc.time() - st

From 1beec6f8ccfc148afa6e41ca4a542db958e880c9 Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sun, 31 May 2026 04:39:59 +0000
Subject: [PATCH 14/14] Update documentation

---
 man/multivariate_analysis_pipeline.Rd | 1 +
 1 file changed, 1 insertion(+)

diff --git a/man/multivariate_analysis_pipeline.Rd b/man/multivariate_analysis_pipeline.Rd
index 54ea75f5..fdd504f9 100644
--- a/man/multivariate_analysis_pipeline.Rd
+++ b/man/multivariate_analysis_pipeline.Rd
@@ -10,6 +10,7 @@ multivariate_analysis_pipeline(
   maf,
   X_variance = NULL,
   other_quantities = list(),
+  region = NULL,
   imiss_cutoff = 1,
   maf_cutoff = 0.01,
   xvar_cutoff = 0.01,