Skip to content

Get an error when impute with "condition" #10

@Freddsle

Description

@Freddsle

Here:

https://github.com/daisybio/PRONE/blob/86f4db6f9ef0ed30621f877d89810f88322ebee0/R/Imputation.R#L35C6-L35C46

Probably, because of the argument to get() is the column vector - the condition became dt$condition, not the string "condition").

How to reproduce and possibly fix:

# ---- packages ----
library(SummarizedExperiment)
library(dplyr)
library(data.table)

# ---- fake proteomics-like data ----
set.seed(1)
genes   <- paste0("P", sprintf("%05d", 1:5))                 # 5 proteins
samples <- c(paste0("lab_A_", 1:3), paste0("lab_B_", 1:3))   # 6 samples

mat <- matrix(rnorm(length(genes) * length(samples)),
              nrow = length(genes),
              dimnames = list(genes, samples))

# Induce MNAR-by-condition: proteins fully missing within a condition
mat["P00002", samples[1:3]] <- NA_real_   # missing in all A
mat["P00004", samples[4:6]] <- NA_real_   # missing in all B

# ---- sample annotation (colData) ----
cd <- data.frame(
  file      = samples,
  lab       = sub("(_.*)$", "", samples),
  condition = rep(c("A","B"), each = 3),
  Repl      = rep(1:3, times = 2),
  row.names = samples,
  stringsAsFactors = FALSE
)
cd$Column <- rownames(cd)   # join key matching colnames(se)

# ---- build SE ----
se <- SummarizedExperiment(
  assays  = list(raw = mat),
  colData = cd
)

# ---- reshape to long + join ----
ain <- "raw"
dt <- as.data.table(assays(se)[[ain]])
dt$ID <- rownames(dt)
dt <- melt(dt, id.vars = "ID", variable.name = "Column", value.name = "Intensity")

coldata <- as.data.table(colData(se))
dt <- merge(dt, coldata, by = "Column")

# sanity: keys line up
stopifnot(identical(sort(unique(dt$Column)), sort(colnames(se))))

# ---- column name ----
condition <- "condition"
message("Using condition: ", condition)

# ---- 1) Reproduce  ----
bad <- tryCatch({
  dt %>%
    group_by(ID, get(condition)) %>%           # <-- this is the bug
    summarize(NAs = all(is.na(Intensity))) %>%
    filter(NAs) %>%
    distinct(ID) %>%
    pull(ID)
}, error = function(e) {
  message("Expected error: ", e$message)
  NULL
})

# ---- 2) Fix: ----
ok <- dt %>%
  group_by(ID, !!sym(condition)) %>%         # <-- FIX
  summarize(NAs = all(is.na(Intensity)), .groups = "drop") %>%
  filter(NAs) %>%
  distinct(ID) %>%
  pull(ID)

print(ok)   # Proteins MNAR within a condition (e.g., "P00002", "P00004")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions