From 58d5a93bc29af9867c9800eb48eb5965a90214f9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 16 Jun 2026 11:08:41 +0000 Subject: [PATCH 1/8] feat: add nf-core-compatible bigWig coverage tracks (#112) Implement bedtools genomecov-equivalent coverage accumulation in the existing BAM streaming pass and write bigWig files via bigtools. Produces nf-core/rnaseq-compatible outputs: - {sample}.bigWig (combined, -split -bg) - {sample}.forward/reverse.bigWig for stranded libraries (-split -du -strand) Validated against bedtools genomecov + UCSC bedClip/bedGraphToBigWig in integration tests. Co-authored-by: Phil Ewels --- CHANGELOG.md | 6 + Cargo.lock | 243 ++++++++++++++++++- Cargo.toml | 4 + src/citations.rs | 11 + src/config.rs | 28 +++ src/main.rs | 38 ++- src/rna/bigwig/accumulator.rs | 400 ++++++++++++++++++++++++++++++++ src/rna/bigwig/mod.rs | 10 + src/rna/bigwig/output.rs | 123 ++++++++++ src/rna/dupradar/counting.rs | 61 +++++ src/rna/featurecounts/output.rs | 1 + src/rna/mod.rs | 1 + tests/integration_test.rs | 242 +++++++++++++++++++ 13 files changed, 1166 insertions(+), 2 deletions(-) create mode 100644 src/rna/bigwig/accumulator.rs create mode 100644 src/rna/bigwig/mod.rs create mode 100644 src/rna/bigwig/output.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b67cce..b88ab72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # RustQC Changelog +## Unreleased + +### Features + +- Add bigWig genome coverage tracks in the existing BAM streaming pass, matching nf-core/rnaseq `bedtools genomecov` + UCSC `bedClip` + `bedGraphToBigWig` output ([#112](https://github.com/seqeralabs/RustQC/issues/112)) + ## [Version 0.2.1](https://github.com/seqeralabs/RustQC/releases/tag/v0.2.1) - 2026-04-09 ### Bug fixes diff --git a/Cargo.lock b/Cargo.lock index da20215..a3844c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,6 +88,37 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bigtools" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af1b9bbf6596d602e472a23ed5aa5d611fb04f14e7772226fb61c720e806202e" +dependencies = [ + "bincode", + "byteorder", + "byteordered", + "crossbeam-channel", + "crossbeam-utils", + "futures", + "index_list", + "itertools 0.10.5", + "libdeflater", + "serde", + "smallvec", + "tempfile", + "thiserror 1.0.69", + "tokio", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bindgen" version = "0.69.5" @@ -97,7 +128,7 @@ dependencies = [ "bitflags 2.11.0", "cexpr", "clang-sys", - "itertools", + "itertools 0.12.1", "lazy_static", "lazycell", "proc-macro2", @@ -151,6 +182,15 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteordered" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbf2cd9424f5ff404aba1959c835cbc448ee8b689b870a9981c76c0fd46280e6" +dependencies = [ + "byteorder", +] + [[package]] name = "bzip2-sys" version = "0.1.13+1.0.8" @@ -373,6 +413,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -535,6 +584,22 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + [[package]] name = "fdeflate" version = "0.3.7" @@ -653,6 +718,94 @@ dependencies = [ "quick-error", ] +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -896,6 +1049,12 @@ dependencies = [ "png", ] +[[package]] +name = "index_list" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30141a73bc8a129ac1ce472e33f45af3e2091d86b3479061b9c2f92fdbe9a28c" + [[package]] name = "indexmap" version = "2.13.0" @@ -927,6 +1086,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -1016,6 +1184,24 @@ version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" +[[package]] +name = "libdeflate-sys" +version = "1.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72753e0008ea87963d2f0770042d0df7abe51fafbb8dcaf618ac440f2f1fec0a" +dependencies = [ + "cc", +] + +[[package]] +name = "libdeflater" +version = "1.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1ee41cf6fb1bb6030dfb59ffb7bc01ab26aade44142084c87f0fc7a1658fe71" +dependencies = [ + "libdeflate-sys", +] + [[package]] name = "libloading" version = "0.8.9" @@ -1054,6 +1240,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee" +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + [[package]] name = "litemap" version = "0.8.1" @@ -1198,6 +1390,12 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "pkg-config" version = "0.3.32" @@ -1475,11 +1673,25 @@ dependencies = [ "semver 1.0.27", ] +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + [[package]] name = "rustqc" version = "0.2.1" dependencies = [ "anyhow", + "bigtools", "cc", "clap", "coitrees", @@ -1502,6 +1714,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml_ng", + "tokio", ] [[package]] @@ -1605,6 +1818,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallvec" version = "1.15.1" @@ -1658,6 +1877,19 @@ dependencies = [ "syn", ] +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -1708,6 +1940,15 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "pin-project-lite", +] + [[package]] name = "ttf-parser" version = "0.20.0" diff --git a/Cargo.toml b/Cargo.toml index ebf02f0..b3155e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,10 @@ indicatif = "0.17" number_prefix = "0.4" serde_json = "1" +# bigWig writing (bedGraphToBigWig equivalent) +bigtools = { version = "0.5", default-features = false, features = ["write"] } +tokio = { version = "1", features = ["rt-multi-thread"] } + [build-dependencies] cc = "1" diff --git a/src/citations.rs b/src/citations.rs index 316ed4d..08de7a5 100644 --- a/src/citations.rs +++ b/src/citations.rs @@ -64,6 +64,14 @@ const QUALIMAP: Citation = Citation { doi: "10.1093/bioinformatics/bts503", }; +const BIGWIG: Citation = Citation { + heading: "bedtools genomecov (v2.31.1) + UCSC utilities", + description: "RustQC produces nf-core/rnaseq-compatible bigWig coverage tracks (bedtools genomecov, bedClip, bedGraphToBigWig).", + reference: "Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. *Bioinformatics*. 2010;26(6):841-842. Kent WJ, et al. The human genome browser at UCSC. *Genome Res*. 2002;12(6):996-1006.", + url: "https://github.com/arq5x/bedtools2", + doi: "10.1093/bioinformatics/btq033", +}; + fn write_citation(w: &mut impl Write, c: &Citation) -> std::io::Result<()> { writeln!(w, "## {}\n", c.heading)?; writeln!(w, "{}\n", c.description)?; @@ -113,6 +121,9 @@ pub fn write_citations(path: &Path, config: &RnaConfig, version: &str, commit: & if config.qualimap.enabled { write_citation(&mut w, &QUALIMAP)?; } + if config.bigwig.enabled { + write_citation(&mut w, &BIGWIG)?; + } w.flush()?; Ok(()) diff --git a/src/config.rs b/src/config.rs index 4952a1f..62753c5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -165,6 +165,10 @@ pub struct RnaConfig { /// Qualimap RNA-Seq QC configuration. #[serde(default)] pub qualimap: QualimapConfig, + + /// bigWig genome coverage track configuration. + #[serde(default)] + pub bigwig: BigwigConfig, } // ============================================================================ @@ -600,6 +604,30 @@ impl Default for QualimapConfig { } } +/// Configuration for bigWig genome coverage tracks. +/// +/// When enabled, produces nf-core/rnaseq-compatible bigWig files using +/// bedtools genomecov-equivalent parameters (`-split -bg`, plus per-strand +/// `-split -du -strand +/- -bg` for stranded libraries). +/// +/// Example: +/// ```yaml +/// bigwig: +/// enabled: true +/// ``` +#[derive(Debug, Deserialize)] +#[serde(default)] +pub struct BigwigConfig { + /// Whether to generate bigWig coverage tracks. Defaults to true. + pub enabled: bool, +} + +impl Default for BigwigConfig { + fn default() -> Self { + Self { enabled: true } + } +} + /// Configuration for samtools idxstats-compatible output. /// /// When enabled, produces a file matching `samtools idxstats` output format, diff --git a/src/main.rs b/src/main.rs index 4c66c17..772c4f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,8 @@ //! 8 RSeQC-equivalent tools (bam_stat, infer_experiment, read_duplication, //! read_distribution, junction_annotation, junction_saturation, inner_distance, TIN), //! preseq library complexity extrapolation, samtools-compatible outputs -//! (flagstat, idxstats, stats), and Qualimap gene body coverage profiling. +//! (flagstat, idxstats, stats), Qualimap gene body coverage profiling, and +//! nf-core/rnaseq-compatible bigWig coverage tracks. //! Individual tools can be disabled via the YAML config file. mod citations; @@ -1043,6 +1044,7 @@ fn process_single_bam( None }, qualimap_index.as_ref(), + config.bigwig.enabled, Some(&pb), )?; let count_duration = count_start.elapsed(); @@ -1421,6 +1423,40 @@ fn process_single_bam( written_outputs.push(("Qualimap".into(), p)); } + // === bigWig genome coverage tracks === + if let Some(ref gc_result) = count_result.genomecov { + let bw_dir = if params.flat_output { + outdir.to_path_buf() + } else { + outdir.join("bigwig") + }; + + let chrom_sizes: Vec<(String, u64)> = { + let reader = rust_htslib::bam::Reader::from_path(bam_path) + .with_context(|| format!("Failed to open BAM for bigWig chrom sizes: {}", bam_path))?; + let header = reader.header(); + (0..header.target_count()) + .map(|tid| { + let name = String::from_utf8_lossy(header.tid2name(tid)).to_string(); + let len = header.target_len(tid).unwrap_or(0); + (name, len) + }) + .collect() + }; + + let bw_outputs = rna::bigwig::write_bigwig_tracks( + &bw_dir, + &sample_name, + gc_result, + &chrom_sizes, + threads, + )?; + for (label, path) in bw_outputs { + ui.output_item(&label, &path); + written_outputs.push((label, path)); + } + } + // === RSeQC analyses (post-processing of single-pass accumulators) === let rseqc_accums = rseqc_accums.unwrap_or_else(|| { ui.detail("No RSeQC tools enabled, skipping"); diff --git a/src/rna/bigwig/accumulator.rs b/src/rna/bigwig/accumulator.rs new file mode 100644 index 0000000..184f368 --- /dev/null +++ b/src/rna/bigwig/accumulator.rs @@ -0,0 +1,400 @@ +//! Genome coverage accumulation matching `bedtools genomecov`. +//! +//! Replicates nf-core/rnaseq bigWig generation parameters: +//! - Combined track: `-split -bg` +//! - Per-strand tracks (stranded libraries): `-split -du -strand +/- -bg` +//! +//! Coverage uses a difference-array (starts/ends) per chromosome, identical to +//! bedtools' `BedGenomeCoverage` implementation. + +use rust_htslib::bam; +use rust_htslib::bam::record::Cigar; + +use crate::rna::bam_flags::BAM_FREAD2; +use crate::Strandedness; + +/// Strand filter for per-strand coverage tracks. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StrandFilter { + /// Forward reference strand (`-strand +`). + Plus, + /// Reverse reference strand (`-strand -`). + Minus, +} + +/// Settings for a single coverage track. +#[derive(Debug, Clone, Copy)] +struct TrackSettings { + /// Split reads at splicing blocks (`-split`). + split: bool, + /// Flip second-mate strand for strand-specific libraries (`-du`). + deduplicate_strand: bool, + /// Optional strand filter (`-strand`). + strand: Option, +} + +/// Difference-array endpoint counters for one reference position. +#[derive(Debug, Default, Clone, Copy)] +struct DiffPoint { + starts: u32, + ends: u32, +} + +/// Per-chromosome coverage using bedtools' starts/ends difference array. +#[derive(Debug, Clone)] +pub(crate) struct ChromCoverage { + /// Chromosome length in bases. + size: u64, + /// Per-base start/end counters (length == size). + points: Vec, +} + +impl ChromCoverage { + fn new(size: u64) -> Self { + Self { + size, + points: vec![DiffPoint::default(); size as usize], + } + } + + /// Add coverage for `[start, end]` inclusive, matching bedtools `AddCoverage`. + fn add_interval(&mut self, start: u64, end: u64) { + if self.size == 0 { + return; + } + let chrom_size = self.size; + if start < chrom_size { + self.points[start as usize].starts += 1; + } + if end < chrom_size { + self.points[end as usize].ends += 1; + } else { + self.points[(chrom_size - 1) as usize].ends += 1; + } + } + + fn merge(&mut self, other: &Self) { + debug_assert_eq!(self.size, other.size); + for (a, b) in self.points.iter_mut().zip(other.points.iter()) { + a.starts += b.starts; + a.ends += b.ends; + } + } +} + +/// Accumulator for one coverage track (combined, forward, or reverse). +#[derive(Debug, Default, Clone)] +pub struct TrackAccum { + chroms: Vec<(String, ChromCoverage)>, +} + +impl TrackAccum { + fn new(chroms: &[(String, u64)]) -> Self { + Self { + chroms: chroms + .iter() + .map(|(name, len)| (name.clone(), ChromCoverage::new(*len))) + .collect(), + } + } + + fn chrom_mut(&mut self, chrom: &str) -> Option<&mut ChromCoverage> { + self.chroms + .iter_mut() + .find(|(name, _)| name == chrom) + .map(|(_, cov)| cov) + } + + fn merge(&mut self, other: Self) { + for (name, other_cov) in other.chroms { + if let Some((_, self_cov)) = self.chroms.iter_mut().find(|(n, _)| *n == name) { + self_cov.merge(&other_cov); + } else { + self.chroms.push((name, other_cov)); + } + } + } +} + +/// Bundle of coverage accumulators for all nf-core/rnaseq bigWig tracks. +#[derive(Debug, Clone)] +pub struct GenomeCovAccum { + /// Strand-agnostic combined coverage (`-split -bg`). + pub combined: TrackAccum, + /// Forward-genome-strand coverage (stranded libraries only). + pub forward: Option, + /// Reverse-genome-strand coverage (stranded libraries only). + pub reverse: Option, +} + +impl GenomeCovAccum { + /// Create accumulators for the given chromosomes. + /// + /// Per-strand tracks are only created for stranded libraries. + pub fn new(stranded: Strandedness, chroms: &[(String, u64)]) -> Self { + let (forward, reverse) = if stranded == Strandedness::Unstranded { + (None, None) + } else { + ( + Some(TrackAccum::new(chroms)), + Some(TrackAccum::new(chroms)), + ) + }; + Self { + combined: TrackAccum::new(chroms), + forward, + reverse, + } + } + + /// Merge another worker's accumulators into this one. + pub fn merge(&mut self, other: Self) { + self.combined.merge(other.combined); + if let (Some(ref mut f), Some(o)) = (&mut self.forward, other.forward) { + f.merge(o); + } + if let (Some(ref mut r), Some(o)) = (&mut self.reverse, other.reverse) { + r.merge(o); + } + } + + /// Process a mapped BAM record, updating all enabled tracks. + pub fn process_read(&mut self, record: &bam::Record, chrom: &str, stranded: Strandedness) { + if record.is_unmapped() { + return; + } + + process_track( + &mut self.combined, + record, + chrom, + TrackSettings { + split: true, + deduplicate_strand: false, + strand: None, + }, + ); + + if stranded == Strandedness::Unstranded { + return; + } + + let (fwd_strand, rev_strand) = match stranded { + Strandedness::Forward => (StrandFilter::Plus, StrandFilter::Minus), + Strandedness::Reverse => (StrandFilter::Minus, StrandFilter::Plus), + Strandedness::Unstranded => unreachable!(), + }; + + if let Some(ref mut forward) = self.forward { + process_track( + forward, + record, + chrom, + TrackSettings { + split: true, + deduplicate_strand: true, + strand: Some(fwd_strand), + }, + ); + } + if let Some(ref mut reverse) = self.reverse { + process_track( + reverse, + record, + chrom, + TrackSettings { + split: true, + deduplicate_strand: true, + strand: Some(rev_strand), + }, + ); + } + } +} + +fn process_track( + track: &mut TrackAccum, + record: &bam::Record, + chrom: &str, + settings: TrackSettings, +) { + let Some(cov) = track.chrom_mut(chrom) else { + return; + }; + + let mut is_reverse = record.is_reverse(); + // bedtools `-du`: flip second mate strand so both mates appear on the + // same fragment strand (dUTP/strand-specific PE libraries). + if settings.deduplicate_strand + && record.is_paired() + && !record.is_mate_unmapped() + && record.flags() & BAM_FREAD2 != 0 + { + is_reverse = !is_reverse; + } + + if let Some(filter) = settings.strand { + let want_reverse = filter == StrandFilter::Minus; + if is_reverse != want_reverse { + return; + } + } + + if settings.split { + let mut blocks_buf = Vec::new(); + genomecov_blocks(record, &mut blocks_buf); + for (start, end) in blocks_buf { + // bedtools uses inclusive end; blocks are half-open [start, end). + cov.add_interval(start, end.saturating_sub(1)); + } + } else { + let start = record.pos() as u64; + let end = record.cigar().end_pos() as u64 - 1; + cov.add_interval(start, end); + } +} + +/// Extract alignment blocks for `-split` mode, matching bedtools `GetBamBlocks` +/// with `breakOnDeletionOps=true` (default, no `-ignoreD`). +fn genomecov_blocks(record: &bam::Record, blocks: &mut Vec<(u64, u64)>) { + blocks.clear(); + let mut ref_pos = record.pos() as u64; + + for op in record.cigar().iter() { + match op { + Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => { + let len = *len as u64; + blocks.push((ref_pos, ref_pos + len)); + ref_pos += len; + } + Cigar::RefSkip(len) | Cigar::Del(len) => { + ref_pos += *len as u64; + } + Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => {} + } + } +} + +/// Merged genome coverage ready for bedGraph / bigWig output. +#[derive(Debug)] +pub struct GenomeCovResult { + /// Combined strand-agnostic track. + pub combined: TrackAccum, + /// Forward-genome-strand track (None for unstranded libraries). + pub forward: Option, + /// Reverse-genome-strand track (None for unstranded libraries). + pub reverse: Option, +} + +impl From for GenomeCovResult { + fn from(acc: GenomeCovAccum) -> Self { + Self { + combined: acc.combined, + forward: acc.forward, + reverse: acc.reverse, + } + } +} + +/// Convert a chromosome's difference array to bedGraph intervals (depth > 0). +/// +/// Output matches bedtools `ReportChromCoverageBedGraph` with scale=1. +pub(crate) fn chrom_to_bedgraph(chrom: &str, cov: &ChromCoverage) -> Vec<(String, u32, u32, u32)> { + let mut out = Vec::new(); + let mut depth: i64 = 0; + let mut last_start: i64 = -1; + let mut last_depth: i64 = -1; + + for (pos, point) in cov.points.iter().enumerate() { + depth += point.starts as i64; + + if depth != last_depth { + if last_depth > 0 { + out.push(( + chrom.to_string(), + last_start as u32, + pos as u32, + last_depth as u32, + )); + } + last_depth = depth; + last_start = pos as i64; + } + + depth -= point.ends as i64; + } + + if last_depth > 0 { + out.push(( + chrom.to_string(), + last_start as u32, + cov.size as u32, + last_depth as u32, + )); + } + + out +} + +/// Clip bedGraph intervals to chromosome sizes (UCSC `bedClip` behaviour). +pub fn clip_bedgraph( + entries: Vec<(String, u32, u32, u32)>, + chrom_sizes: &[(String, u64)], +) -> Vec<(String, u32, u32, u32)> { + let sizes: std::collections::HashMap<&str, u64> = chrom_sizes + .iter() + .map(|(n, s)| (n.as_str(), *s)) + .collect(); + + entries + .into_iter() + .filter_map(|(chrom, start, end, val)| { + let size = *sizes.get(chrom.as_str())? as u32; + if start >= size { + return None; + } + let clipped_end = end.min(size); + if start >= clipped_end { + return None; + } + Some((chrom, start, clipped_end, val)) + }) + .collect() +} + +/// Flatten a track accumulator into bedGraph entries in chromosome order. +pub fn track_to_bedgraph( + track: &TrackAccum, + chrom_order: &[(String, u64)], +) -> Vec<(String, u32, u32, u32)> { + let mut entries = Vec::new(); + for (chrom, _) in chrom_order { + if let Some((_, cov)) = track.chroms.iter().find(|(n, _)| n == chrom) { + entries.extend(chrom_to_bedgraph(chrom, cov)); + } + } + entries +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_chrom_to_bedgraph_simple() { + let mut cov = ChromCoverage::new(10); + cov.add_interval(2, 4); // positions 2,3,4 covered + cov.add_interval(2, 4); // depth 2 + + let bg = chrom_to_bedgraph("chr1", &cov); + assert_eq!(bg, vec![("chr1".to_string(), 2, 5, 2)]); + } + + #[test] + fn test_clip_bedgraph() { + let entries = vec![("chr1".to_string(), 100, 200, 5)]; + let sizes = vec![("chr1".to_string(), 150u64)]; + let clipped = clip_bedgraph(entries, &sizes); + assert_eq!(clipped, vec![("chr1".to_string(), 100, 150, 5)]); + } +} diff --git a/src/rna/bigwig/mod.rs b/src/rna/bigwig/mod.rs new file mode 100644 index 0000000..d23dc29 --- /dev/null +++ b/src/rna/bigwig/mod.rs @@ -0,0 +1,10 @@ +//! Genome-wide coverage bigWig track generation. +//! +//! Reimplements the nf-core/rnaseq bigWig pipeline (`bedtools genomecov` + +//! UCSC `bedClip` + `bedGraphToBigWig`) in a single streaming pass over the BAM. + +pub mod accumulator; +pub mod output; + +pub use accumulator::{GenomeCovAccum, GenomeCovResult}; +pub use output::write_bigwig_tracks; diff --git a/src/rna/bigwig/output.rs b/src/rna/bigwig/output.rs new file mode 100644 index 0000000..7a0a70c --- /dev/null +++ b/src/rna/bigwig/output.rs @@ -0,0 +1,123 @@ +//! bigWig output generation from genome coverage accumulators. +//! +//! Converts accumulated coverage to bedGraph, clips to chromosome boundaries +//! (UCSC `bedClip`), and writes bigWig files (via the `bigtools` crate). + +use std::collections::HashMap; +use std::path::Path; + +use anyhow::{Context, Result}; +use bigtools::beddata::BedParserStreamingIterator; +use bigtools::{BigWigWrite, Value}; + +use super::accumulator::{clip_bedgraph, track_to_bedgraph, GenomeCovResult, TrackAccum}; + +/// Write all bigWig coverage tracks for a sample. +/// +/// Produces nf-core/rnaseq-compatible filenames: +/// - `{sample}.bigWig` — combined (always) +/// - `{sample}.forward.bigWig` — forward strand (stranded libraries) +/// - `{sample}.reverse.bigWig` — reverse strand (stranded libraries) +pub fn write_bigwig_tracks( + outdir: &Path, + sample_name: &str, + result: &GenomeCovResult, + chrom_sizes: &[(String, u64)], + threads: usize, +) -> Result> { + std::fs::create_dir_all(outdir) + .with_context(|| format!("Failed to create bigwig output directory: {}", outdir.display()))?; + + let mut written = Vec::new(); + + let combined_path = outdir.join(format!("{sample_name}.bigWig")); + if write_track_bigwig(&combined_path, &result.combined, chrom_sizes, threads)? { + written.push(( + "bigWig (combined)".into(), + combined_path.display().to_string(), + )); + } + + if let Some(ref forward) = result.forward { + let path = outdir.join(format!("{sample_name}.forward.bigWig")); + if write_track_bigwig(&path, forward, chrom_sizes, threads)? { + written.push(("bigWig (forward)".into(), path.display().to_string())); + } + } + + if let Some(ref reverse) = result.reverse { + let path = outdir.join(format!("{sample_name}.reverse.bigWig")); + if write_track_bigwig(&path, reverse, chrom_sizes, threads)? { + written.push(("bigWig (reverse)".into(), path.display().to_string())); + } + } + + Ok(written) +} + +/// Write a single track as a bigWig file. +/// +/// Skips writing when the track has no coverage intervals (e.g. an empty +/// per-strand track), matching upstream behaviour where `bedGraphToBigWig` +/// receives an empty clipped bedGraph. +fn write_track_bigwig( + path: &Path, + track: &TrackAccum, + chrom_sizes: &[(String, u64)], + threads: usize, +) -> Result { + let bedgraph = track_to_bedgraph(track, chrom_sizes); + let clipped = clip_bedgraph(bedgraph, chrom_sizes); + if clipped.is_empty() { + log::debug!( + "Skipping empty bigWig track (no coverage intervals): {}", + path.display() + ); + return Ok(false); + } + write_bedgraph_bigwig(path, clipped, chrom_sizes, threads)?; + Ok(true) +} + +/// Write bedGraph entries to a bigWig file using bigtools. +fn write_bedgraph_bigwig( + path: &Path, + entries: Vec<(String, u32, u32, u32)>, + chrom_sizes: &[(String, u64)], + threads: usize, +) -> Result<()> { + let chrom_map: HashMap = chrom_sizes + .iter() + .map(|(name, size)| (name.clone(), *size as u32)) + .collect(); + + let iter = entries.into_iter().map(|(chrom, start, end, val)| { + ( + chrom, + Value { + start, + end, + value: val as f32, + }, + ) + }); + let data = BedParserStreamingIterator::wrap_infallible_iter(iter, true); + + let runtime = if threads <= 1 { + tokio::runtime::Builder::new_current_thread() + .build() + .context("Failed to create tokio runtime for bigWig writing")? + } else { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(threads) + .build() + .context("Failed to create tokio runtime for bigWig writing")? + }; + + let outb = BigWigWrite::create_file(path.to_string_lossy().to_string(), chrom_map) + .context("Failed to create bigWig writer")?; + outb.write(data, runtime) + .map_err(|e| anyhow::anyhow!("Failed to write bigWig to {}: {e}", path.display()))?; + + Ok(()) +} diff --git a/src/rna/dupradar/counting.rs b/src/rna/dupradar/counting.rs index 785115c..a947c9d 100644 --- a/src/rna/dupradar/counting.rs +++ b/src/rna/dupradar/counting.rs @@ -13,6 +13,7 @@ use crate::gtf::Gene; use crate::io::format_count; use crate::rna::qualimap::QualimapAccum; use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig}; +use crate::rna::bigwig::GenomeCovAccum; use crate::Strandedness; use anyhow::{Context, Result}; use coitrees::{COITree, Interval, IntervalTree}; @@ -299,6 +300,8 @@ pub struct CountResult { /// Qualimap RNA-Seq QC results (if enabled). #[allow(dead_code)] pub qualimap: Option, + /// Genome coverage for bigWig tracks (if enabled). + pub genomecov: Option, } /// Metadata stored with each interval in the cache-oblivious interval tree. @@ -542,6 +545,9 @@ struct ChromResult { /// Qualimap RNA-Seq QC accumulator (if enabled). qualimap: Option, + + /// Genome coverage accumulator for bigWig tracks (if enabled). + genomecov: Option, } impl ChromResult { @@ -575,6 +581,7 @@ impl ChromResult { fc_biotype_ambiguous: 0, fc_biotype_no_features: 0, qualimap: None, + genomecov: None, } } @@ -666,6 +673,14 @@ impl ChromResult { self.qualimap = Some(other_qm); } } + // Merge genome coverage accumulators + if let Some(other_gc) = other.genomecov { + if let Some(ref mut self_gc) = self.genomecov { + self_gc.merge(other_gc); + } else { + self.genomecov = Some(other_gc); + } + } } } @@ -979,6 +994,8 @@ fn process_chromosome_batch( rseqc_annotations: Option<&RseqcAnnotations>, htslib_threads: usize, qualimap_index: Option<&crate::rna::qualimap::QualimapIndex>, + genomecov_enabled: bool, + tid_to_len: &[u64], gene_to_biotype: &[u16], num_biotypes: usize, progress: Option<&ProgressBar>, @@ -987,6 +1004,18 @@ fn process_chromosome_batch( if qualimap_index.is_some() { result.qualimap = Some(crate::rna::qualimap::QualimapAccum::new(stranded)); } + if genomecov_enabled { + let batch_chroms: Vec<(String, u64)> = tids + .iter() + .map(|&tid| { + ( + tid_to_name[tid as usize].clone(), + tid_to_len[tid as usize], + ) + }) + .collect(); + result.genomecov = Some(GenomeCovAccum::new(stranded, &batch_chroms)); + } let mut rseqc_accums = rseqc_config.map(|cfg| RseqcAccumulators::new(cfg, rseqc_annotations)); // Pre-compute resolved chromosome names per TID (apply prefix/mapping once, @@ -1074,6 +1103,15 @@ fn process_chromosome_batch( } } + // --- bigWig genome coverage (bedtools genomecov equivalent) --- + // Only skips unmapped reads; uses BAM reference names. + if let Some(ref mut gc) = result.genomecov { + let tid = record.tid(); + if tid >= 0 && (tid as usize) < tid_to_name.len() { + gc.process_read(&record, &tid_to_name[tid as usize], stranded); + } + } + // Resolve chromosome for gene counting let rec_tid = record.tid(); let chrom = if rec_tid >= 0 && (rec_tid as usize) < tid_to_gtf_chrom.len() { @@ -1152,6 +1190,7 @@ pub fn count_reads( rseqc_config: Option<&RseqcConfig>, rseqc_annotations: Option<&RseqcAnnotations>, qualimap_index: Option<&crate::rna::qualimap::QualimapIndex>, + genomecov_enabled: bool, progress: Option<&ProgressBar>, ) -> Result { // Build gene ID interner for allocation-free lookups in the hot loop @@ -1300,6 +1339,8 @@ pub fn count_reads( rseqc_annotations, htslib_threads, qualimap_index, + genomecov_enabled, + &tid_to_len, &gene_to_biotype, num_biotypes, progress, @@ -1332,6 +1373,16 @@ pub fn count_reads( rseqc_config.map(|cfg| RseqcAccumulators::new(cfg, rseqc_annotations)); let mut qualimap_accum: Option = qualimap_index.map(|_| crate::rna::qualimap::QualimapAccum::new(stranded)); + let mut genomecov_accum = if genomecov_enabled { + let all_chroms: Vec<(String, u64)> = tid_to_name + .iter() + .zip(tid_to_len.iter()) + .map(|(name, len)| (name.clone(), *len)) + .collect(); + Some(GenomeCovAccum::new(stranded, &all_chroms)) + } else { + None + }; // Pre-compute resolved chromosome names for RSeQC tools // (apply chromosome prefix and mapping, same as parallel path) @@ -1417,6 +1468,14 @@ pub fn count_reads( } } + // --- bigWig genome coverage (bedtools genomecov equivalent) --- + if let Some(ref mut gc) = genomecov_accum { + let tid = record.tid(); + if tid >= 0 && (tid as usize) < tid_to_name.len() { + gc.process_read(&record, &tid_to_name[tid as usize], stranded); + } + } + // Resolve chromosome for gene counting let tid = record.tid(); let chrom = if tid >= 0 && (tid as usize) < tid_to_rseqc_chrom.len() { @@ -1442,6 +1501,7 @@ pub fn count_reads( result.unmatched_mates = mate_buffer; result.qualimap = qualimap_accum; + result.genomecov = genomecov_accum; (result, rseqc_accums) }; @@ -1715,6 +1775,7 @@ pub fn count_reads( } _ => None, }, + genomecov: merged.genomecov.map(Into::into), }) } diff --git a/src/rna/featurecounts/output.rs b/src/rna/featurecounts/output.rs index 424c1a7..c1ee2d2 100644 --- a/src/rna/featurecounts/output.rs +++ b/src/rna/featurecounts/output.rs @@ -375,6 +375,7 @@ mod tests { fc_biotype_no_features: 10, rseqc: None, qualimap: None, + genomecov: None, }; let biotypes = aggregate_biotype_counts(&count_result); diff --git a/src/rna/mod.rs b/src/rna/mod.rs index 7dbcc09..99bb2bd 100644 --- a/src/rna/mod.rs +++ b/src/rna/mod.rs @@ -4,6 +4,7 @@ //! and RSeQC tool reimplementations. pub mod bam_flags; +pub mod bigwig; pub mod cpp_rng; pub mod dupradar; pub mod featurecounts; diff --git a/tests/integration_test.rs b/tests/integration_test.rs index c7ea3ac..f46348b 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1026,3 +1026,245 @@ fn test_dup_check_parallel_uses_global_duplicate_state() { let _ = fs::remove_dir_all(root); } + +// ============================================================================ +// bigWig coverage track tests (bedtools genomecov + UCSC bedGraphToBigWig) +// ============================================================================ + +/// Parse a 4-column bedGraph file into tuples. +fn read_bedgraph(path: &Path) -> Vec<(String, u32, u32, u32)> { + fs::read_to_string(path) + .unwrap_or_else(|e| panic!("Failed to read bedGraph {}: {e}", path.display())) + .lines() + .filter(|l| !l.is_empty()) + .map(|line| { + let parts: Vec<&str> = line.split('\t').collect(); + ( + parts[0].to_string(), + parts[1].parse().unwrap(), + parts[2].parse().unwrap(), + parts[3].parse::().unwrap() as u32, + ) + }) + .collect() +} + +fn which_bedtools() -> Option { + for candidate in ["/tmp/bedtools2/bin/bedtools", "bedtools"] { + if Command::new(candidate) + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) + { + return Some(candidate.to_string()); + } + } + None +} + +fn which_ucsc_tool(name: &str) -> Option { + for candidate in [format!("/tmp/{name}"), name.to_string()] { + if Path::new(&candidate).exists() { + return Some(candidate); + } + } + None +} + +fn bigwig_to_bedgraph(bw: &Path, out: &Path) -> bool { + let bgtobg = match which_ucsc_tool("bigWigToBedGraph") { + Some(p) => p, + None => return false, + }; + Command::new(&bgtobg) + .args([bw.to_str().unwrap(), out.to_str().unwrap()]) + .status() + .map(|s| s.success()) + .unwrap_or(false) +} + +fn write_test_chrom_sizes(path: &Path) { + use rust_htslib::bam::Read; + let r = rust_htslib::bam::Reader::from_path("tests/data/test.bam").unwrap(); + let h = r.header(); + let mut sizes = String::new(); + for tid in 0..h.target_count() { + let name = String::from_utf8_lossy(h.tid2name(tid as u32)); + let len = h.target_len(tid as u32).unwrap_or(0); + sizes.push_str(&format!("{name}\t{len}\n")); + } + fs::write(path, sizes).unwrap(); +} + +fn sort_bedgraph_file(path: &Path) { + let sorted = fs::read_to_string(path).unwrap(); + let mut lines: Vec = sorted.lines().map(String::from).collect(); + lines.sort_by(|a, b| { + let pa: Vec<&str> = a.split('\t').collect(); + let pb: Vec<&str> = b.split('\t').collect(); + pa[0] + .cmp(pb[0]) + .then_with(|| pa[1].parse::().unwrap().cmp(&pb[1].parse::().unwrap())) + }); + fs::write(path, lines.join("\n") + "\n").unwrap(); +} + +#[test] +fn test_bigwig_combined_matches_bedtools_genomecov() { + if which_bedtools().is_none() || which_ucsc_tool("bigWigToBedGraph").is_none() { + eprintln!("Skipping bigWig cross-check: upstream tools not found"); + return; + } + + let root = unique_test_dir("bigwig"); + fs::create_dir_all(&root).unwrap(); + let chrom_sizes_path = root.join("chrom.sizes"); + write_test_chrom_sizes(&chrom_sizes_path); + + let bedtools = which_bedtools().unwrap(); + let bg = root.join("combined.bedGraph"); + let clip = root.join("combined.clip.bedGraph"); + let ref_bw = root.join("ref.bigWig"); + + let status = Command::new(&bedtools) + .args([ + "genomecov", + "-ibam", + "tests/data/test.bam", + "-split", + "-bg", + ]) + .stdout(std::process::Stdio::from( + std::fs::File::create(&bg).expect("create bedGraph"), + )) + .status() + .expect("bedtools"); + assert!(status.success()); + sort_bedgraph_file(&bg); + + Command::new(which_ucsc_tool("bedClip").unwrap()) + .args([ + bg.to_str().unwrap(), + chrom_sizes_path.to_str().unwrap(), + clip.to_str().unwrap(), + ]) + .status() + .unwrap(); + Command::new(which_ucsc_tool("bedGraphToBigWig").unwrap()) + .args([ + clip.to_str().unwrap(), + chrom_sizes_path.to_str().unwrap(), + ref_bw.to_str().unwrap(), + ]) + .status() + .unwrap(); + + let outdir = root.join("rustqc"); + let output = run_rustqc_impl(outdir.to_str().unwrap(), "tests/data/test.bam", true); + assert!( + output.status.success(), + "rustqc failed:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let rust_bw = outdir.join("bigwig/test.bigWig"); + assert!(rust_bw.exists(), "RustQC bigWig missing"); + + let ref_bg = root.join("ref.bg"); + let rust_bg = root.join("rust.bg"); + assert!(bigwig_to_bedgraph(&ref_bw, &ref_bg)); + assert!(bigwig_to_bedgraph(&rust_bw, &rust_bg)); + assert_eq!(read_bedgraph(&ref_bg), read_bedgraph(&rust_bg)); + + let _ = fs::remove_dir_all(root); +} + +#[test] +fn test_bigwig_stranded_forward_matches_bedtools() { + if which_bedtools().is_none() || which_ucsc_tool("bigWigToBedGraph").is_none() { + eprintln!("Skipping stranded bigWig cross-check: upstream tools not found"); + return; + } + + let root = unique_test_dir("bigwig-stranded"); + fs::create_dir_all(&root).unwrap(); + let chrom_sizes_path = root.join("chrom.sizes"); + write_test_chrom_sizes(&chrom_sizes_path); + + let bedtools = which_bedtools().unwrap(); + let bg = root.join("forward.bedGraph"); + let clip = root.join("forward.clip.bedGraph"); + let ref_bw = root.join("ref.forward.bigWig"); + + let status = Command::new(&bedtools) + .args([ + "genomecov", + "-ibam", + "tests/data/test.bam", + "-split", + "-du", + "-strand", + "+", + "-bg", + ]) + .stdout(std::process::Stdio::from( + std::fs::File::create(&bg).expect("create bedGraph"), + )) + .status() + .expect("bedtools"); + assert!(status.success()); + sort_bedgraph_file(&bg); + + Command::new(which_ucsc_tool("bedClip").unwrap()) + .args([ + bg.to_str().unwrap(), + chrom_sizes_path.to_str().unwrap(), + clip.to_str().unwrap(), + ]) + .status() + .unwrap(); + Command::new(which_ucsc_tool("bedGraphToBigWig").unwrap()) + .args([ + clip.to_str().unwrap(), + chrom_sizes_path.to_str().unwrap(), + ref_bw.to_str().unwrap(), + ]) + .status() + .unwrap(); + + let outdir = root.join("rustqc"); + let binary = rustqc_binary(); + let output = Command::new(&binary) + .args([ + "rna", + "tests/data/test.bam", + "--gtf", + "tests/data/test.gtf", + "--outdir", + outdir.to_str().unwrap(), + "--stranded", + "forward", + "--skip-dup-check", + ]) + .output() + .expect("rustqc"); + assert!( + output.status.success(), + "rustqc failed:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let rust_bw = outdir.join("bigwig/test.forward.bigWig"); + assert!(rust_bw.exists()); + + let ref_bg = root.join("ref.bg"); + let rust_bg = root.join("rust.bg"); + assert!(bigwig_to_bedgraph(&ref_bw, &ref_bg)); + assert!(bigwig_to_bedgraph(&rust_bw, &rust_bg)); + assert_eq!(read_bedgraph(&ref_bg), read_bedgraph(&rust_bg)); + + let _ = fs::remove_dir_all(root); +} From caaee374805117587d1fa9e63f6ca416c4cd36dd Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 16 Jun 2026 11:32:35 +0000 Subject: [PATCH 2/8] docs: document bigWig coverage tracks across site and markdown Add a dedicated bigWig docs page and update README, AGENTS.md, CONTRIBUTING.md, getting-started guides, credits, CLI/config references, benchmark details, and library doc comments to describe nf-core/rnaseq- compatible genome coverage track output. Co-authored-by: Phil Ewels --- AGENTS.md | 17 +- CHANGELOG.md | 4 + CONTRIBUTING.md | 6 +- README.md | 7 +- docs/astro.config.mjs | 1 + docs/src/content/docs/about/credits.mdx | 18 +++ .../docs/getting-started/introduction.mdx | 2 + .../docs/getting-started/quickstart.md | 5 +- docs/src/content/docs/index.mdx | 8 +- .../content/docs/rna/benchmark-details.mdx | 4 +- docs/src/content/docs/rna/bigwig.mdx | 149 ++++++++++++++++++ docs/src/content/docs/usage/cli-reference.mdx | 11 +- docs/src/content/docs/usage/configuration.md | 32 +++- docs/src/content/docs/usage/library.mdx | 2 +- src/cli.rs | 7 +- src/lib.rs | 4 +- 16 files changed, 253 insertions(+), 24 deletions(-) create mode 100644 docs/src/content/docs/rna/bigwig.mdx diff --git a/AGENTS.md b/AGENTS.md index 108414a..2c117e8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,8 @@ > [preseq](https://github.com/smithlabcode/preseq) `lc_extrap`) > - **samtools-compatible** flagstat, idxstats, and full stats output > - **Gene body coverage** profiling with Qualimap-compatible output +> - **bigWig** genome coverage tracks (nf-core/rnaseq-compatible; bedtools +> `genomecov` + UCSC `bedClip` semantics) > > Binary crate (`rustqc`), Rust edition 2021. @@ -63,7 +65,7 @@ src/ io.rs — Shared I/O utilities (gzip-transparent file reading) gtf.rs — GTF annotation file parser (with configurable attribute extraction) rna/ - mod.rs — Re-exports all submodules (dupradar, featurecounts, rseqc, bam_flags, cpp_rng, preseq, qualimap) + mod.rs — Re-exports all submodules (dupradar, featurecounts, rseqc, bam_flags, cpp_rng, preseq, qualimap, bigwig) bam_flags.rs — BAM flag constants cpp_rng.rs — C++ RNG FFI shim for preseq bootstrap reproducibility dupradar/ @@ -76,6 +78,10 @@ src/ mod.rs — Re-exports output output.rs — featureCounts-format output & biotype counting preseq.rs — preseq lc_extrap library complexity extrapolation + bigwig/ + mod.rs — Re-exports accumulator and output + accumulator.rs — bedtools genomecov difference-array coverage accumulation + output.rs — bedClip semantics + bigWig writing via bigtools qualimap/ mod.rs — Re-exports all Qualimap modules accumulator.rs — Gene body coverage accumulation logic @@ -120,8 +126,9 @@ dupRadar duplicate rate analysis, featureCounts-compatible gene counting, all 8 RSeQC-equivalent tools (bam_stat, infer_experiment, read_duplication, read_distribution, junction_annotation, junction_saturation, inner_distance, TIN), preseq library complexity extrapolation, samtools-compatible outputs -(flagstat, idxstats, stats), and gene body coverage profiling with -Qualimap-compatible output. The GTF parser extracts transcript-level +(flagstat, idxstats, stats), gene body coverage profiling with +Qualimap-compatible output, and nf-core/rnaseq-compatible bigWig coverage +tracks. The GTF parser extracts transcript-level structure (exons + CDS features) to derive all data needed by every tool. Individual tools can be disabled via the YAML config file (each has an `enabled` @@ -255,6 +262,8 @@ To prepare a release: | `rayon` | Data parallelism | | `rand` / `rand_chacha` | Reproducible random sampling | | `flate2` | Gzip decompression (annotation files) | +| `bigtools` | bigWig file writing | +| `tokio` | Async runtime for bigtools | ## Duplicate Marking Validation @@ -294,7 +303,7 @@ forwarded to `count_reads()` as the `skip_dup_check: bool` parameter). `infer_experiment:`, `read_duplication:`, `read_distribution:`, `junction_annotation:`, `junction_saturation:`, `inner_distance:`, `tin:`). Each has an `enabled: bool` toggle and tool-specific parameter overrides. CLI flags take precedence over config values. -- Under `rna:`, there are also sections for `preseq:`, `qualimap:`, +- Under `rna:`, there are also sections for `preseq:`, `qualimap:`, `bigwig:`, `flagstat:`, `idxstats:`, and `samtools_stats:`. Each has an `enabled: bool` toggle. Preseq has additional parameters: `max_extrap`, `step_size`, `n_bootstraps`, `confidence_level`, `seed`, `max_terms`, `defects`. TIN has `sample_size` and diff --git a/CHANGELOG.md b/CHANGELOG.md index b88ab72..6712497 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - Add bigWig genome coverage tracks in the existing BAM streaming pass, matching nf-core/rnaseq `bedtools genomecov` + UCSC `bedClip` + `bedGraphToBigWig` output ([#112](https://github.com/seqeralabs/RustQC/issues/112)) +### Documentation + +- Document bigWig coverage tracks on the docs site, README, AGENTS.md, and related markdown files + ## [Version 0.2.1](https://github.com/seqeralabs/RustQC/releases/tag/v0.2.1) - 2026-04-09 ### Bug fixes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 181de96..65b30ad 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -86,7 +86,7 @@ src/ gtf.rs GTF annotation file parser rna/ mod.rs Re-exports all submodules (bam_flags, cpp_rng, dupradar, - featurecounts, preseq, qualimap, rseqc) + featurecounts, preseq, qualimap, bigwig, rseqc) bam_flags.rs BAM flag constants cpp_rng.rs C++ RNG FFI shim for preseq bootstrap reproducibility dupradar/ @@ -99,6 +99,10 @@ src/ mod.rs Re-exports output output.rs featureCounts-format output and biotype counting preseq.rs preseq lc_extrap library complexity extrapolation + bigwig/ + mod.rs Re-exports accumulator and output + accumulator.rs bedtools genomecov coverage accumulation + output.rs bedClip semantics + bigWig writing qualimap/ mod.rs Re-exports accumulator, coverage, index, output, plots, report accumulator.rs Gene body coverage accumulator diff --git a/README.md b/README.md index f576fc7..65dc431 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@

- Benchmark: RustQC ~14m 54s vs traditional tools ~15h 34m sequential (dupRadar + featureCounts + 8 RSeQC tools incl. TIN + preseq + samtools + Qualimap) + Benchmark: RustQC ~14m 54s vs traditional tools ~15h 34m sequential (dupRadar + featureCounts + 8 RSeQC tools incl. TIN + preseq + samtools + Qualimap + bigWig)

@@ -52,11 +52,12 @@ It currently includes: | TIN | [RSeQC](https://rseqc.sourceforge.net/#tin-py) `tin.py` | Transcript Integrity Number | | preseq | [preseq](http://smithlabresearch.org/software/preseq/) `lc_extrap` | Library complexity extrapolation | | Qualimap rnaseq | [Qualimap](http://qualimap.conesalab.org/) `rnaseq` | Gene body coverage, read origin, strand specificity | +| bigWig | [nf-core/rnaseq](https://nf-co.re/rnaseq) `bedtools genomecov` + UCSC utilities | Genome-wide coverage tracks (combined and per-strand) | | flagstat | [samtools](http://www.htslib.org/) `flagstat` | Alignment flag summary | | idxstats | [samtools](http://www.htslib.org/) `idxstats` | Per-chromosome read counts | | stats | [samtools](http://www.htslib.org/) `stats` | Full samtools stats output including all histogram sections | -All outputs are format- and numerically identical to the upstream tools, and compatible with [MultiQC](https://multiqc.info/) for reporting. +Most outputs are format- and numerically identical to the upstream tools, and compatible with [MultiQC](https://multiqc.info/) for reporting. bigWig coverage intervals match [bedtools](https://github.com/arq5x/bedtools2) `genomecov` exactly; binary `.bigWig` files use a different encoder but decode to the same coverage (see [bigWig docs](https://seqeralabs.github.io/RustQC/rna/bigwig/)). ## Quick start @@ -84,7 +85,7 @@ See the [documentation](https://seqeralabs.github.io/RustQC/) for full usage det ## Use as a Rust library -The crate is also published as a library, so the QC analysis modules (GTF parsing, dupRadar, featureCounts, RSeQC, Qualimap, preseq, samtools-style outputs) can be embedded into other Rust programs: +The crate is also published as a library, so the QC analysis modules (GTF parsing, dupRadar, featureCounts, RSeQC, Qualimap, bigWig, preseq, samtools-style outputs) can be embedded into other Rust programs: ```toml [dependencies] diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs index 4e1c2f4..01a7fca 100644 --- a/docs/astro.config.mjs +++ b/docs/astro.config.mjs @@ -68,6 +68,7 @@ export default defineConfig({ { label: "featureCounts", slug: "rna/featurecounts" }, { label: "RSeQC", slug: "rna/rseqc" }, { label: "Qualimap", slug: "rna/qualimap" }, + { label: "bigWig", slug: "rna/bigwig" }, { label: "Preseq", slug: "rna/preseq" }, { label: "Samtools", slug: "rna/samtools" }, ], diff --git a/docs/src/content/docs/about/credits.mdx b/docs/src/content/docs/about/credits.mdx index 171086e..75bb8a8 100644 --- a/docs/src/content/docs/about/credits.mdx +++ b/docs/src/content/docs/about/credits.mdx @@ -98,6 +98,22 @@ Qualimap rnaseq report. - Website: [qualimap.conesalab.org](http://qualimap.conesalab.org/) - Publication: [doi.org/10.1093/bioinformatics/bts503](https://doi.org/10.1093/bioinformatics/bts503) +## bigWig coverage tracks + +RustQC produces nf-core/rnaseq-compatible genome-wide bigWig coverage tracks, +matching the parameters used by [bedtools](https://github.com/arq5x/bedtools2) +`genomecov` and UCSC `bedClip` / `bedGraphToBigWig`. See the +[bigWig](../rna/bigwig/) page for output files, strandedness behaviour, and +validation details. + +> Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing +> genomic features. *Bioinformatics*. 2010;26(6):841-842. + +- bedtools: [github.com/arq5x/bedtools2](https://github.com/arq5x/bedtools2) +- Publication: [doi.org/10.1093/bioinformatics/btq033](https://doi.org/10.1093/bioinformatics/btq033) +- bigWig format (UCSC): Kent WJ, et al. The human genome browser at UCSC. + *Genome Research*. 2002;12(6):996-1006. + ## Key dependencies RustQC is built with the following open-source Rust libraries: @@ -114,6 +130,8 @@ RustQC is built with the following open-source Rust libraries: | [flate2](https://github.com/rust-lang/flate2-rs) | Gzip decompression for annotation files | | [serde](https://github.com/serde-rs/serde) | YAML configuration deserialization | | [indexmap](https://github.com/indexmap-rs/indexmap) | Insertion-order-preserving maps | +| [bigtools](https://github.com/jackh726/bigtools) | bigWig and BigBed file I/O | +| [tokio](https://github.com/tokio-rs/tokio) | Async runtime (bigWig writing) | | [env_logger](https://github.com/rust-cli/env_logger) | Logging output | ## RustQC diff --git a/docs/src/content/docs/getting-started/introduction.mdx b/docs/src/content/docs/getting-started/introduction.mdx index e65f65c..63deb95 100644 --- a/docs/src/content/docs/getting-started/introduction.mdx +++ b/docs/src/content/docs/getting-started/introduction.mdx @@ -13,6 +13,7 @@ RustQC is a fast quality control toolkit for sequencing data, written in Rust. I - [preseq](https://github.com/smithlabcode/preseq): library complexity extrapolation (lc_extrap) - [samtools](http://www.htslib.org/): flagstat, idxstats, and full stats output including all histogram sections - [Qualimap](http://qualimap.conesalab.org/): gene body coverage profiling and RNA-seq QC summary +- [nf-core/rnaseq](https://nf-co.re/rnaseq) bigWig: genome-wide coverage tracks (`bedtools genomecov` + UCSC utilities) ## Why RustQC? @@ -41,6 +42,7 @@ Given a duplicate-marked BAM file and a GTF annotation, `rustqc rna` runs all of | dupRadar | [dupRadar](https://bioconductor.org/packages/dupRadar/) | RNA-seq PCR duplicate rate analysis | | featureCounts | [featureCounts](http://subread.sourceforge.net/) | Gene-level read counting and biotypes | | Qualimap rnaseq | [Qualimap](http://qualimap.conesalab.org/) rnaseq | Gene body coverage and RNA-seq QC | +| bigWig | [nf-core/rnaseq](https://nf-co.re/rnaseq) `bedtools genomecov` | Genome-wide coverage tracks | | preseq | [preseq](http://smithlabresearch.org/software/preseq/) `lc_extrap` | Library complexity extrapolation | | flagstat | `samtools flagstat` | Alignment flag statistics | | idxstats | `samtools idxstats` | Per-chromosome read counts | diff --git a/docs/src/content/docs/getting-started/quickstart.md b/docs/src/content/docs/getting-started/quickstart.md index 1f3dd84..99afe7a 100644 --- a/docs/src/content/docs/getting-started/quickstart.md +++ b/docs/src/content/docs/getting-started/quickstart.md @@ -7,7 +7,7 @@ A basic RustQC analysis from install to results. ## RNA-seq duplicate analysis -Run all RNA-seq QC analyses (dupRadar, featureCounts, RSeQC tools including TIN, Qualimap, preseq, and samtools) in a single pass: +Run all RNA-seq QC analyses (dupRadar, featureCounts, RSeQC tools including TIN, Qualimap, bigWig coverage tracks, preseq, and samtools) in a single pass: ```bash rustqc rna sample.markdup.bam --gtf genes.gtf -p -o results/ @@ -27,7 +27,8 @@ This command: ### Output -Output files are organized into subdirectories by tool group. +Output files are organized into subdirectories by tool group +(for example `dupradar/`, `rseqc/`, `qualimap/`, `bigwig/`, `preseq/`, and `samtools/`). Files are generally named the same as their upstream tool equivalents. This means that MultiQC should find them and report them as if they were created by the original tool. diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx index 54afb33..24c2170 100644 --- a/docs/src/content/docs/index.mdx +++ b/docs/src/content/docs/index.mdx @@ -27,6 +27,7 @@ export const base = import.meta.env.BASE_URL; featureCounts,{" "} RSeQC,{" "} Qualimap,{" "} + nf-core/rnaseq bigWig,{" "} preseq, and{" "} samtools in Rust, delivering matching results much faster. @@ -163,7 +164,8 @@ export const base = import.meta.env.BASE_URL; Produces identical or near-identical output to R's dupRadar and featureCounts, plus faithful reimplementations of 8 RSeQC tools (including TIN), preseq, - and samtools. See [benchmark details](./rna/benchmark-details/). + samtools, and nf-core/rnaseq-compatible bigWig coverage tracks. + See [benchmark details](./rna/benchmark-details/). Processes ~186M reads in 14m 54s on AWS vs ~15h 34m of sequential tool @@ -171,7 +173,7 @@ export const base = import.meta.env.BASE_URL; workflow. - Replaces 14+ separate QC tool invocations with a single CLI command. + Replaces 15+ separate QC tool invocations with a single CLI command. One task to stage data for. One pass through the BAM file. Minimal staging and I/O overhead. @@ -183,5 +185,5 @@ export const base = import.meta.env.BASE_URL; ## Credits -RustQC reimplements established tools including dupRadar, featureCounts, RSeQC, preseq, samtools, and Qualimap. +RustQC reimplements established tools including dupRadar, featureCounts, RSeQC, preseq, samtools, Qualimap, and nf-core/rnaseq bigWig coverage tracks. See the [Credits & Citations](./about/credits/) page for the full list and how to cite them. diff --git a/docs/src/content/docs/rna/benchmark-details.mdx b/docs/src/content/docs/rna/benchmark-details.mdx index 8594e00..a3ec3e0 100644 --- a/docs/src/content/docs/rna/benchmark-details.mdx +++ b/docs/src/content/docs/rna/benchmark-details.mdx @@ -45,7 +45,8 @@ across chromosomes. Per-tool comparison tables and known differences are on the individual tool pages: [dupRadar](../dupradar/), [featureCounts](../featurecounts/), [RSeQC](../rseqc/), -[preseq](../preseq/), [Samtools](../samtools/), [Qualimap](../qualimap/). +[preseq](../preseq/), [Samtools](../samtools/), [Qualimap](../qualimap/), +[bigWig](../bigwig/). ## Upstream tool versions @@ -59,6 +60,7 @@ RustQC output was validated against these specific versions: | preseq | 3.2.0 | | samtools | 1.22.1 | | Qualimap | 2.3 | +| bedtools | 2.31.1 | Exact container tags and commands are in the [RustQC-benchmarks](https://github.com/seqeralabs/RustQC-benchmarks) Nextflow pipeline. diff --git a/docs/src/content/docs/rna/bigwig.mdx b/docs/src/content/docs/rna/bigwig.mdx new file mode 100644 index 0000000..ed29b50 --- /dev/null +++ b/docs/src/content/docs/rna/bigwig.mdx @@ -0,0 +1,149 @@ +--- +title: bigWig Coverage Tracks +description: Genome-wide bigWig coverage tracks compatible with nf-core/rnaseq, generated in the RustQC streaming pass. +--- + +import { Aside, FileTree } from "@astrojs/starlight/components"; + + + +RustQC generates **bigWig** genome coverage tracks during the `rustqc rna` +single-pass BAM scan. The implementation replicates the parameters used by +[nf-core/rnaseq](https://nf-co.re/rnaseq) for its `bigwig/` output directory. + +## What it replaces + +In nf-core/rnaseq, each sample typically runs three sequential steps per strand: + +1. `bedtools genomecov` on the final genome BAM → sorted bedGraph +2. `bedClip` with chromosome sizes → clipped bedGraph +3. `bedGraphToBigWig` → `.bigWig` file + +RustQC collapses this into the existing BAM read loop. Coverage is accumulated +with the same `bedtools genomecov` logic (`-split`, optional `-du` and +`-strand`), clipped to chromosome boundaries (UCSC `bedClip` semantics), and +written as bigWig via the [`bigtools`](https://github.com/jackh726/bigtools) +crate. + + + +## Output files + +All bigWig files are written to a `bigwig/` subdirectory under the output +directory. Use `--flat-output` to write files directly to the top-level output +directory instead. + + +- bigwig/ + - sample.bigWig Combined strand-agnostic coverage (all libraries) + - sample.forward.bigWig Forward-genome-strand coverage (stranded libraries only) + - sample.reverse.bigWig Reverse-genome-strand coverage (stranded libraries only) + + +File naming matches nf-core/rnaseq: `{sample_id}.bigWig`, +`{sample_id}.forward.bigWig`, and `{sample_id}.reverse.bigWig`. + +### Combined track (all libraries) + +**File:** `.bigWig` + +Equivalent to nf-core `BEDTOOLS_GENOMECOV_COMBINED`: + +```bash +bedtools genomecov -ibam sample.bam -split -bg +``` + +### Per-strand tracks (stranded libraries only) + +When `--stranded forward` or `--stranded reverse` is set (or configured in +YAML), RustQC also writes forward- and reverse-genome-strand tracks using +`-split -du -strand +/-` with strand labels swapped for reverse libraries, matching +nf-core/rnaseq `bigwig.config`. + +| Library type | Forward track file | bedtools strand filter | +| ------------ | ------------------ | ---------------------- | +| `forward` | `.forward.bigWig` | `-strand +` | +| `forward` | `.reverse.bigWig` | `-strand -` | +| `reverse` | `.forward.bigWig` | `-strand -` | +| `reverse` | `.reverse.bigWig` | `-strand +` | + +For **unstranded** libraries, only the combined `{sample}.bigWig` is produced. +Per-strand tracks are omitted because strand-specific filtering is not +meaningful without a known library protocol. + + + +## Parameters + +RustQC matches nf-core/rnaseq defaults: + +| Parameter | nf-core/rnaseq | RustQC | +| --------- | -------------- | ------ | +| Normalization | None (`scale=1`) | None | +| Splicing | `-split` | `-split` (CIGAR N/D block splitting) | +| PE deduplication | `-du` on per-strand tracks only | `-du` on per-strand tracks only | +| MAPQ filter | None (all mapped reads) | None (all mapped reads) | +| Clipping | `bedClip` with chrom sizes | Equivalent clipping before write | + +Input must be a **coordinate-sorted** BAM (or SAM/CRAM). No GTF annotation is +used for coverage calculation itself, but `--gtf` is still required by the +`rustqc rna` command. + +## Validation + +Coverage intervals match [bedtools](https://github.com/arq5x/bedtools2) +`genomecov` v2.31.1 with UCSC `bedClip` semantics. Integration tests decode +both RustQC and reference bigWig files with UCSC `bigWigToBedGraph` and compare +all bedGraph intervals exactly. + +The **binary `.bigWig` files are not bit-identical** to UCSC `bedGraphToBigWig` +output because RustQC encodes them with `bigtools` rather than the UCSC +utility. Decoded coverage values and interval boundaries are identical; only +the on-disk compression and index structure differ. + +## Configuration + +bigWig generation is enabled by default. Disable with: + +```yaml +rna: + bigwig: + enabled: false +``` + +See the [Configuration](../usage/configuration/#bigwig) page for details. + +## nf-core/rnaseq integration + +To use RustQC bigWig output in place of the nf-core/rnaseq bigWig subworkflow, +run `rustqc rna` on the final genome BAM and publish the `bigwig/` directory +to `{outdir}/{aligner}/bigwig/` (the path nf-core uses for MultiQC and +downstream consumers). + +Set `--stranded` to match the library protocol so per-strand tracks are +generated with the correct nf-core naming and strand filters. + +## References + +- **bedtools:** Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities + for comparing genomic features. *Bioinformatics*. 2010;26(6):841-842. + [bedtools on GitHub](https://github.com/arq5x/bedtools2) +- **bigWig format:** Kent WJ, et al. The human genome browser at UCSC. + *Genome Research*. 2002;12(6):996-1006. +- **bigtools:** Ramírez JM, et al. Bigtools: a high-performance BigWig and + BigBed library in Rust. *Bioinformatics*. 2024. + [bigtools on GitHub](https://github.com/jackh726/bigtools) diff --git a/docs/src/content/docs/usage/cli-reference.mdx b/docs/src/content/docs/usage/cli-reference.mdx index deca6b4..8e4cf2c 100644 --- a/docs/src/content/docs/usage/cli-reference.mdx +++ b/docs/src/content/docs/usage/cli-reference.mdx @@ -16,7 +16,7 @@ RNA-seq quality control: duplicate rate analysis (dupRadar equivalent), featureCounts-compatible read counting with biotype summaries, 8 RSeQC-equivalent tools (bam_stat, infer_experiment, read_duplication, read_distribution, junction_annotation, junction_saturation, inner_distance, -TIN), Qualimap RNA-seq QC, preseq library complexity, and samtools-compatible +TIN), Qualimap RNA-seq QC, bigWig genome coverage tracks, preseq library complexity, and samtools-compatible outputs. ### Synopsis @@ -54,7 +54,7 @@ Path to a GTF gene annotation file (plain or gzip-compressed). **Required.** The GTF must contain `exon` features with a `gene_id` attribute. Transcript-level structure (exon blocks, CDS features) is extracted automatically and used by all analyses: dupRadar, featureCounts, all 8 RSeQC tools (including TIN), -Qualimap, preseq, and samtools. +Qualimap, bigWig coverage tracks, preseq, and samtools. Gzip compression is detected automatically by inspecting the file header (magic bytes), so the `.gz` extension is not required. @@ -79,6 +79,11 @@ Library strandedness for strand-aware read counting: **Default:** `unstranded` +For **bigWig coverage tracks**, stranded libraries also produce +`{sample}.forward.bigWig` and `{sample}.reverse.bigWig` with nf-core/rnaseq- +compatible strand filters. Unstranded libraries receive only `{sample}.bigWig`. +See [bigWig](../rna/bigwig/) for details. +