diff --git a/AGENTS.md b/AGENTS.md
index 108414a..2c117e8 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -13,6 +13,8 @@
> [preseq](https://github.com/smithlabcode/preseq) `lc_extrap`)
> - **samtools-compatible** flagstat, idxstats, and full stats output
> - **Gene body coverage** profiling with Qualimap-compatible output
+> - **bigWig** genome coverage tracks (nf-core/rnaseq-compatible; bedtools
+> `genomecov` + UCSC `bedClip` semantics)
>
> Binary crate (`rustqc`), Rust edition 2021.
@@ -63,7 +65,7 @@ src/
io.rs — Shared I/O utilities (gzip-transparent file reading)
gtf.rs — GTF annotation file parser (with configurable attribute extraction)
rna/
- mod.rs — Re-exports all submodules (dupradar, featurecounts, rseqc, bam_flags, cpp_rng, preseq, qualimap)
+ mod.rs — Re-exports all submodules (dupradar, featurecounts, rseqc, bam_flags, cpp_rng, preseq, qualimap, bigwig)
bam_flags.rs — BAM flag constants
cpp_rng.rs — C++ RNG FFI shim for preseq bootstrap reproducibility
dupradar/
@@ -76,6 +78,10 @@ src/
mod.rs — Re-exports output
output.rs — featureCounts-format output & biotype counting
preseq.rs — preseq lc_extrap library complexity extrapolation
+ bigwig/
+ mod.rs — Re-exports accumulator and output
+ accumulator.rs — bedtools genomecov difference-array coverage accumulation
+ output.rs — bedClip semantics + bigWig writing via bigtools
qualimap/
mod.rs — Re-exports all Qualimap modules
accumulator.rs — Gene body coverage accumulation logic
@@ -120,8 +126,9 @@ dupRadar duplicate rate analysis, featureCounts-compatible gene counting,
all 8 RSeQC-equivalent tools (bam_stat, infer_experiment, read_duplication,
read_distribution, junction_annotation, junction_saturation, inner_distance,
TIN), preseq library complexity extrapolation, samtools-compatible outputs
-(flagstat, idxstats, stats), and gene body coverage profiling with
-Qualimap-compatible output. The GTF parser extracts transcript-level
+(flagstat, idxstats, stats), gene body coverage profiling with
+Qualimap-compatible output, and nf-core/rnaseq-compatible bigWig coverage
+tracks. The GTF parser extracts transcript-level
structure (exons + CDS features) to derive all data needed by every tool.
Individual tools can be disabled via the YAML config file (each has an `enabled`
@@ -255,6 +262,8 @@ To prepare a release:
| `rayon` | Data parallelism |
| `rand` / `rand_chacha` | Reproducible random sampling |
| `flate2` | Gzip decompression (annotation files) |
+| `bigtools` | bigWig file writing |
+| `tokio` | Async runtime for bigtools |
## Duplicate Marking Validation
@@ -294,7 +303,7 @@ forwarded to `count_reads()` as the `skip_dup_check: bool` parameter).
`infer_experiment:`, `read_duplication:`, `read_distribution:`, `junction_annotation:`,
`junction_saturation:`, `inner_distance:`, `tin:`). Each has an `enabled: bool` toggle
and tool-specific parameter overrides. CLI flags take precedence over config values.
-- Under `rna:`, there are also sections for `preseq:`, `qualimap:`,
+- Under `rna:`, there are also sections for `preseq:`, `qualimap:`, `bigwig:`,
`flagstat:`, `idxstats:`, and `samtools_stats:`. Each has an `enabled: bool` toggle.
Preseq has additional parameters: `max_extrap`, `step_size`, `n_bootstraps`,
`confidence_level`, `seed`, `max_terms`, `defects`. TIN has `sample_size` and
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b67cce..6712497 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
# RustQC Changelog
+## Unreleased
+
+### Features
+
+- Add bigWig genome coverage tracks in the existing BAM streaming pass, matching nf-core/rnaseq `bedtools genomecov` + UCSC `bedClip` + `bedGraphToBigWig` output ([#112](https://github.com/seqeralabs/RustQC/issues/112))
+
+### Documentation
+
+- Document bigWig coverage tracks on the docs site, README, AGENTS.md, and related markdown files
+
## [Version 0.2.1](https://github.com/seqeralabs/RustQC/releases/tag/v0.2.1) - 2026-04-09
### Bug fixes
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 181de96..65b30ad 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -86,7 +86,7 @@ src/
gtf.rs GTF annotation file parser
rna/
mod.rs Re-exports all submodules (bam_flags, cpp_rng, dupradar,
- featurecounts, preseq, qualimap, rseqc)
+ featurecounts, preseq, qualimap, bigwig, rseqc)
bam_flags.rs BAM flag constants
cpp_rng.rs C++ RNG FFI shim for preseq bootstrap reproducibility
dupradar/
@@ -99,6 +99,10 @@ src/
mod.rs Re-exports output
output.rs featureCounts-format output and biotype counting
preseq.rs preseq lc_extrap library complexity extrapolation
+ bigwig/
+ mod.rs Re-exports accumulator and output
+ accumulator.rs bedtools genomecov coverage accumulation
+ output.rs bedClip semantics + bigWig writing
qualimap/
mod.rs Re-exports accumulator, coverage, index, output, plots, report
accumulator.rs Gene body coverage accumulator
diff --git a/Cargo.lock b/Cargo.lock
index da20215..a3844c5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -88,6 +88,37 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+[[package]]
+name = "bigtools"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af1b9bbf6596d602e472a23ed5aa5d611fb04f14e7772226fb61c720e806202e"
+dependencies = [
+ "bincode",
+ "byteorder",
+ "byteordered",
+ "crossbeam-channel",
+ "crossbeam-utils",
+ "futures",
+ "index_list",
+ "itertools 0.10.5",
+ "libdeflater",
+ "serde",
+ "smallvec",
+ "tempfile",
+ "thiserror 1.0.69",
+ "tokio",
+]
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
[[package]]
name = "bindgen"
version = "0.69.5"
@@ -97,7 +128,7 @@ dependencies = [
"bitflags 2.11.0",
"cexpr",
"clang-sys",
- "itertools",
+ "itertools 0.12.1",
"lazy_static",
"lazycell",
"proc-macro2",
@@ -151,6 +182,15 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+[[package]]
+name = "byteordered"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbf2cd9424f5ff404aba1959c835cbc448ee8b689b870a9981c76c0fd46280e6"
+dependencies = [
+ "byteorder",
+]
+
[[package]]
name = "bzip2-sys"
version = "0.1.13+1.0.8"
@@ -373,6 +413,15 @@ dependencies = [
"cfg-if",
]
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+dependencies = [
+ "crossbeam-utils",
+]
+
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
@@ -535,6 +584,22 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
+
[[package]]
name = "fdeflate"
version = "0.3.7"
@@ -653,6 +718,94 @@ dependencies = [
"quick-error",
]
+[[package]]
+name = "futures"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
[[package]]
name = "getrandom"
version = "0.2.17"
@@ -896,6 +1049,12 @@ dependencies = [
"png",
]
+[[package]]
+name = "index_list"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30141a73bc8a129ac1ce472e33f45af3e2091d86b3479061b9c2f92fdbe9a28c"
+
[[package]]
name = "indexmap"
version = "2.13.0"
@@ -927,6 +1086,15 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
[[package]]
name = "itertools"
version = "0.12.1"
@@ -1016,6 +1184,24 @@ version = "0.2.184"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
+[[package]]
+name = "libdeflate-sys"
+version = "1.25.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72753e0008ea87963d2f0770042d0df7abe51fafbb8dcaf618ac440f2f1fec0a"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "libdeflater"
+version = "1.25.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1ee41cf6fb1bb6030dfb59ffb7bc01ab26aade44142084c87f0fc7a1658fe71"
+dependencies = [
+ "libdeflate-sys",
+]
+
[[package]]
name = "libloading"
version = "0.8.9"
@@ -1054,6 +1240,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee"
+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
+
[[package]]
name = "litemap"
version = "0.8.1"
@@ -1198,6 +1390,12 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
[[package]]
name = "pkg-config"
version = "0.3.32"
@@ -1475,11 +1673,25 @@ dependencies = [
"semver 1.0.27",
]
+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags 2.11.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "rustqc"
version = "0.2.1"
dependencies = [
"anyhow",
+ "bigtools",
"cc",
"clap",
"coitrees",
@@ -1502,6 +1714,7 @@ dependencies = [
"serde",
"serde_json",
"serde_yaml_ng",
+ "tokio",
]
[[package]]
@@ -1605,6 +1818,12 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
[[package]]
name = "smallvec"
version = "1.15.1"
@@ -1658,6 +1877,19 @@ dependencies = [
"syn",
]
+[[package]]
+name = "tempfile"
+version = "3.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.2",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -1708,6 +1940,15 @@ dependencies = [
"zerovec",
]
+[[package]]
+name = "tokio"
+version = "1.52.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe"
+dependencies = [
+ "pin-project-lite",
+]
+
[[package]]
name = "ttf-parser"
version = "0.20.0"
diff --git a/Cargo.toml b/Cargo.toml
index ebf02f0..b3155e4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,6 +64,10 @@ indicatif = "0.17"
number_prefix = "0.4"
serde_json = "1"
+# bigWig writing (bedGraphToBigWig equivalent)
+bigtools = { version = "0.5", default-features = false, features = ["write"] }
+tokio = { version = "1", features = ["rt-multi-thread"] }
+
[build-dependencies]
cc = "1"
diff --git a/README.md b/README.md
index f576fc7..65dc431 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@
-
+
@@ -52,11 +52,12 @@ It currently includes:
| TIN | [RSeQC](https://rseqc.sourceforge.net/#tin-py) `tin.py` | Transcript Integrity Number |
| preseq | [preseq](http://smithlabresearch.org/software/preseq/) `lc_extrap` | Library complexity extrapolation |
| Qualimap rnaseq | [Qualimap](http://qualimap.conesalab.org/) `rnaseq` | Gene body coverage, read origin, strand specificity |
+| bigWig | [nf-core/rnaseq](https://nf-co.re/rnaseq) `bedtools genomecov` + UCSC utilities | Genome-wide coverage tracks (combined and per-strand) |
| flagstat | [samtools](http://www.htslib.org/) `flagstat` | Alignment flag summary |
| idxstats | [samtools](http://www.htslib.org/) `idxstats` | Per-chromosome read counts |
| stats | [samtools](http://www.htslib.org/) `stats` | Full samtools stats output including all histogram sections |
-All outputs are format- and numerically identical to the upstream tools, and compatible with [MultiQC](https://multiqc.info/) for reporting.
+Most outputs are format- and numerically identical to the upstream tools, and compatible with [MultiQC](https://multiqc.info/) for reporting. bigWig coverage intervals match [bedtools](https://github.com/arq5x/bedtools2) `genomecov` exactly; binary `.bigWig` files use a different encoder but decode to the same coverage (see [bigWig docs](https://seqeralabs.github.io/RustQC/rna/bigwig/)).
## Quick start
@@ -84,7 +85,7 @@ See the [documentation](https://seqeralabs.github.io/RustQC/) for full usage det
## Use as a Rust library
-The crate is also published as a library, so the QC analysis modules (GTF parsing, dupRadar, featureCounts, RSeQC, Qualimap, preseq, samtools-style outputs) can be embedded into other Rust programs:
+The crate is also published as a library, so the QC analysis modules (GTF parsing, dupRadar, featureCounts, RSeQC, Qualimap, bigWig, preseq, samtools-style outputs) can be embedded into other Rust programs:
```toml
[dependencies]
diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs
index 4e1c2f4..01a7fca 100644
--- a/docs/astro.config.mjs
+++ b/docs/astro.config.mjs
@@ -68,6 +68,7 @@ export default defineConfig({
{ label: "featureCounts", slug: "rna/featurecounts" },
{ label: "RSeQC", slug: "rna/rseqc" },
{ label: "Qualimap", slug: "rna/qualimap" },
+ { label: "bigWig", slug: "rna/bigwig" },
{ label: "Preseq", slug: "rna/preseq" },
{ label: "Samtools", slug: "rna/samtools" },
],
diff --git a/docs/src/content/docs/about/credits.mdx b/docs/src/content/docs/about/credits.mdx
index 171086e..75bb8a8 100644
--- a/docs/src/content/docs/about/credits.mdx
+++ b/docs/src/content/docs/about/credits.mdx
@@ -98,6 +98,22 @@ Qualimap rnaseq report.
- Website: [qualimap.conesalab.org](http://qualimap.conesalab.org/)
- Publication: [doi.org/10.1093/bioinformatics/bts503](https://doi.org/10.1093/bioinformatics/bts503)
+## bigWig coverage tracks
+
+RustQC produces nf-core/rnaseq-compatible genome-wide bigWig coverage tracks,
+matching the parameters used by [bedtools](https://github.com/arq5x/bedtools2)
+`genomecov` and UCSC `bedClip` / `bedGraphToBigWig`. See the
+[bigWig](../rna/bigwig/) page for output files, strandedness behaviour, and
+validation details.
+
+> Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing
+> genomic features. *Bioinformatics*. 2010;26(6):841-842.
+
+- bedtools: [github.com/arq5x/bedtools2](https://github.com/arq5x/bedtools2)
+- Publication: [doi.org/10.1093/bioinformatics/btq033](https://doi.org/10.1093/bioinformatics/btq033)
+- bigWig format (UCSC): Kent WJ, et al. The human genome browser at UCSC.
+ *Genome Research*. 2002;12(6):996-1006.
+
## Key dependencies
RustQC is built with the following open-source Rust libraries:
@@ -114,6 +130,8 @@ RustQC is built with the following open-source Rust libraries:
| [flate2](https://github.com/rust-lang/flate2-rs) | Gzip decompression for annotation files |
| [serde](https://github.com/serde-rs/serde) | YAML configuration deserialization |
| [indexmap](https://github.com/indexmap-rs/indexmap) | Insertion-order-preserving maps |
+| [bigtools](https://github.com/jackh726/bigtools) | bigWig and BigBed file I/O |
+| [tokio](https://github.com/tokio-rs/tokio) | Async runtime (bigWig writing) |
| [env_logger](https://github.com/rust-cli/env_logger) | Logging output |
## RustQC
diff --git a/docs/src/content/docs/getting-started/introduction.mdx b/docs/src/content/docs/getting-started/introduction.mdx
index e65f65c..63deb95 100644
--- a/docs/src/content/docs/getting-started/introduction.mdx
+++ b/docs/src/content/docs/getting-started/introduction.mdx
@@ -13,6 +13,7 @@ RustQC is a fast quality control toolkit for sequencing data, written in Rust. I
- [preseq](https://github.com/smithlabcode/preseq): library complexity extrapolation (lc_extrap)
- [samtools](http://www.htslib.org/): flagstat, idxstats, and full stats output including all histogram sections
- [Qualimap](http://qualimap.conesalab.org/): gene body coverage profiling and RNA-seq QC summary
+- [nf-core/rnaseq](https://nf-co.re/rnaseq) bigWig: genome-wide coverage tracks (`bedtools genomecov` + UCSC utilities)
## Why RustQC?
@@ -41,6 +42,7 @@ Given a duplicate-marked BAM file and a GTF annotation, `rustqc rna` runs all of
| dupRadar | [dupRadar](https://bioconductor.org/packages/dupRadar/) | RNA-seq PCR duplicate rate analysis |
| featureCounts | [featureCounts](http://subread.sourceforge.net/) | Gene-level read counting and biotypes |
| Qualimap rnaseq | [Qualimap](http://qualimap.conesalab.org/) rnaseq | Gene body coverage and RNA-seq QC |
+| bigWig | [nf-core/rnaseq](https://nf-co.re/rnaseq) `bedtools genomecov` | Genome-wide coverage tracks |
| preseq | [preseq](http://smithlabresearch.org/software/preseq/) `lc_extrap` | Library complexity extrapolation |
| flagstat | `samtools flagstat` | Alignment flag statistics |
| idxstats | `samtools idxstats` | Per-chromosome read counts |
diff --git a/docs/src/content/docs/getting-started/quickstart.md b/docs/src/content/docs/getting-started/quickstart.md
index 1f3dd84..99afe7a 100644
--- a/docs/src/content/docs/getting-started/quickstart.md
+++ b/docs/src/content/docs/getting-started/quickstart.md
@@ -7,7 +7,7 @@ A basic RustQC analysis from install to results.
## RNA-seq duplicate analysis
-Run all RNA-seq QC analyses (dupRadar, featureCounts, RSeQC tools including TIN, Qualimap, preseq, and samtools) in a single pass:
+Run all RNA-seq QC analyses (dupRadar, featureCounts, RSeQC tools including TIN, Qualimap, bigWig coverage tracks, preseq, and samtools) in a single pass:
```bash
rustqc rna sample.markdup.bam --gtf genes.gtf -p -o results/
@@ -27,7 +27,8 @@ This command:
### Output
-Output files are organized into subdirectories by tool group.
+Output files are organized into subdirectories by tool group
+(for example `dupradar/`, `rseqc/`, `qualimap/`, `bigwig/`, `preseq/`, and `samtools/`).
Files are generally named the same as their upstream tool equivalents.
This means that MultiQC should find them and report them as if they were created
by the original tool.
diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx
index 54afb33..24c2170 100644
--- a/docs/src/content/docs/index.mdx
+++ b/docs/src/content/docs/index.mdx
@@ -27,6 +27,7 @@ export const base = import.meta.env.BASE_URL;
featureCounts ,{" "}
RSeQC ,{" "}
Qualimap ,{" "}
+ nf-core/rnaseq bigWig ,{" "}
preseq , and{" "}
samtools in Rust, delivering
matching results much faster.
@@ -163,7 +164,8 @@ export const base = import.meta.env.BASE_URL;
Produces identical or near-identical output to R's dupRadar and featureCounts,
plus faithful reimplementations of 8 RSeQC tools (including TIN), preseq,
- and samtools. See [benchmark details](./rna/benchmark-details/).
+ samtools, and nf-core/rnaseq-compatible bigWig coverage tracks.
+ See [benchmark details](./rna/benchmark-details/).
Processes ~186M reads in 14m 54s on AWS vs ~15h 34m of sequential tool
@@ -171,7 +173,7 @@ export const base = import.meta.env.BASE_URL;
workflow.
- Replaces 14+ separate QC tool invocations with a single CLI command.
+ Replaces 15+ separate QC tool invocations with a single CLI command.
One task to stage data for. One pass through the BAM file.
Minimal staging and I/O overhead.
@@ -183,5 +185,5 @@ export const base = import.meta.env.BASE_URL;
## Credits
-RustQC reimplements established tools including dupRadar, featureCounts, RSeQC, preseq, samtools, and Qualimap.
+RustQC reimplements established tools including dupRadar, featureCounts, RSeQC, preseq, samtools, Qualimap, and nf-core/rnaseq bigWig coverage tracks.
See the [Credits & Citations](./about/credits/) page for the full list and how to cite them.
diff --git a/docs/src/content/docs/rna/benchmark-details.mdx b/docs/src/content/docs/rna/benchmark-details.mdx
index 8594e00..a3ec3e0 100644
--- a/docs/src/content/docs/rna/benchmark-details.mdx
+++ b/docs/src/content/docs/rna/benchmark-details.mdx
@@ -45,7 +45,8 @@ across chromosomes.
Per-tool comparison tables and known differences are on the individual tool pages:
[dupRadar](../dupradar/), [featureCounts](../featurecounts/), [RSeQC](../rseqc/),
-[preseq](../preseq/), [Samtools](../samtools/), [Qualimap](../qualimap/).
+[preseq](../preseq/), [Samtools](../samtools/), [Qualimap](../qualimap/),
+[bigWig](../bigwig/).
## Upstream tool versions
@@ -59,6 +60,7 @@ RustQC output was validated against these specific versions:
| preseq | 3.2.0 |
| samtools | 1.22.1 |
| Qualimap | 2.3 |
+| bedtools | 2.31.1 |
Exact container tags and commands are in the
[RustQC-benchmarks](https://github.com/seqeralabs/RustQC-benchmarks) Nextflow pipeline.
diff --git a/docs/src/content/docs/rna/bigwig.mdx b/docs/src/content/docs/rna/bigwig.mdx
new file mode 100644
index 0000000..ed29b50
--- /dev/null
+++ b/docs/src/content/docs/rna/bigwig.mdx
@@ -0,0 +1,149 @@
+---
+title: bigWig Coverage Tracks
+description: Genome-wide bigWig coverage tracks compatible with nf-core/rnaseq, generated in the RustQC streaming pass.
+---
+
+import { Aside, FileTree } from "@astrojs/starlight/components";
+
+
+The [nf-core/rnaseq](https://nf-co.re/rnaseq) pipeline generates genome-wide
+coverage tracks with `bedtools genomecov`, UCSC `bedClip`, and
+`bedGraphToBigWig`. RustQC produces equivalent coverage in the same BAM
+streaming pass — no separate bedGraph round-trips.
+
+[nf-core/rnaseq bigWig docs](https://nf-co.re/rnaseq/latest/docs/output/#bigwig-files) | [bedtools genomecov](https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html)
+
+
+RustQC generates **bigWig** genome coverage tracks during the `rustqc rna`
+single-pass BAM scan. The implementation replicates the parameters used by
+[nf-core/rnaseq](https://nf-co.re/rnaseq) for its `bigwig/` output directory.
+
+## What it replaces
+
+In nf-core/rnaseq, each sample typically runs three sequential steps per strand:
+
+1. `bedtools genomecov` on the final genome BAM → sorted bedGraph
+2. `bedClip` with chromosome sizes → clipped bedGraph
+3. `bedGraphToBigWig` → `.bigWig` file
+
+RustQC collapses this into the existing BAM read loop. Coverage is accumulated
+with the same `bedtools genomecov` logic (`-split`, optional `-du` and
+`-strand`), clipped to chromosome boundaries (UCSC `bedClip` semantics), and
+written as bigWig via the [`bigtools`](https://github.com/jackh726/bigtools)
+crate.
+
+
+bigWig tracks are generated during the same streaming pass as dupRadar,
+featureCounts, RSeQC, Qualimap, preseq, and samtools outputs.
+
+
+## Output files
+
+All bigWig files are written to a `bigwig/` subdirectory under the output
+directory. Use `--flat-output` to write files directly to the top-level output
+directory instead.
+
+
+- bigwig/
+ - sample.bigWig Combined strand-agnostic coverage (all libraries)
+ - sample.forward.bigWig Forward-genome-strand coverage (stranded libraries only)
+ - sample.reverse.bigWig Reverse-genome-strand coverage (stranded libraries only)
+
+
+File naming matches nf-core/rnaseq: `{sample_id}.bigWig`,
+`{sample_id}.forward.bigWig`, and `{sample_id}.reverse.bigWig`.
+
+### Combined track (all libraries)
+
+**File:** `.bigWig`
+
+Equivalent to nf-core `BEDTOOLS_GENOMECOV_COMBINED`:
+
+```bash
+bedtools genomecov -ibam sample.bam -split -bg
+```
+
+### Per-strand tracks (stranded libraries only)
+
+When `--stranded forward` or `--stranded reverse` is set (or configured in
+YAML), RustQC also writes forward- and reverse-genome-strand tracks using
+`-split -du -strand +/-` with strand labels swapped for reverse libraries, matching
+nf-core/rnaseq `bigwig.config`.
+
+| Library type | Forward track file | bedtools strand filter |
+| ------------ | ------------------ | ---------------------- |
+| `forward` | `.forward.bigWig` | `-strand +` |
+| `forward` | `.reverse.bigWig` | `-strand -` |
+| `reverse` | `.forward.bigWig` | `-strand -` |
+| `reverse` | `.reverse.bigWig` | `-strand +` |
+
+For **unstranded** libraries, only the combined `{sample}.bigWig` is produced.
+Per-strand tracks are omitted because strand-specific filtering is not
+meaningful without a known library protocol.
+
+
+If a per-strand track has no coverage intervals (for example, the reverse track
+when all reads align to the forward strand), RustQC skips writing that file
+rather than emitting an empty bigWig stub.
+
+
+## Parameters
+
+RustQC matches nf-core/rnaseq defaults:
+
+| Parameter | nf-core/rnaseq | RustQC |
+| --------- | -------------- | ------ |
+| Normalization | None (`scale=1`) | None |
+| Splicing | `-split` | `-split` (CIGAR N/D block splitting) |
+| PE deduplication | `-du` on per-strand tracks only | `-du` on per-strand tracks only |
+| MAPQ filter | None (all mapped reads) | None (all mapped reads) |
+| Clipping | `bedClip` with chrom sizes | Equivalent clipping before write |
+
+Input must be a **coordinate-sorted** BAM (or SAM/CRAM). No GTF annotation is
+used for coverage calculation itself, but `--gtf` is still required by the
+`rustqc rna` command.
+
+## Validation
+
+Coverage intervals match [bedtools](https://github.com/arq5x/bedtools2)
+`genomecov` v2.31.1 with UCSC `bedClip` semantics. Integration tests decode
+both RustQC and reference bigWig files with UCSC `bigWigToBedGraph` and compare
+all bedGraph intervals exactly.
+
+The **binary `.bigWig` files are not bit-identical** to UCSC `bedGraphToBigWig`
+output because RustQC encodes them with `bigtools` rather than the UCSC
+utility. Decoded coverage values and interval boundaries are identical; only
+the on-disk compression and index structure differ.
+
+## Configuration
+
+bigWig generation is enabled by default. Disable with:
+
+```yaml
+rna:
+ bigwig:
+ enabled: false
+```
+
+See the [Configuration](../usage/configuration/#bigwig) page for details.
+
+## nf-core/rnaseq integration
+
+To use RustQC bigWig output in place of the nf-core/rnaseq bigWig subworkflow,
+run `rustqc rna` on the final genome BAM and publish the `bigwig/` directory
+to `{outdir}/{aligner}/bigwig/` (the path nf-core uses for MultiQC and
+downstream consumers).
+
+Set `--stranded` to match the library protocol so per-strand tracks are
+generated with the correct nf-core naming and strand filters.
+
+## References
+
+- **bedtools:** Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities
+ for comparing genomic features. *Bioinformatics*. 2010;26(6):841-842.
+ [bedtools on GitHub](https://github.com/arq5x/bedtools2)
+- **bigWig format:** Kent WJ, et al. The human genome browser at UCSC.
+ *Genome Research*. 2002;12(6):996-1006.
+- **bigtools:** Ramírez JM, et al. Bigtools: a high-performance BigWig and
+ BigBed library in Rust. *Bioinformatics*. 2024.
+ [bigtools on GitHub](https://github.com/jackh726/bigtools)
diff --git a/docs/src/content/docs/usage/cli-reference.mdx b/docs/src/content/docs/usage/cli-reference.mdx
index deca6b4..8e4cf2c 100644
--- a/docs/src/content/docs/usage/cli-reference.mdx
+++ b/docs/src/content/docs/usage/cli-reference.mdx
@@ -16,7 +16,7 @@ RNA-seq quality control: duplicate rate analysis (dupRadar equivalent),
featureCounts-compatible read counting with biotype summaries, 8
RSeQC-equivalent tools (bam_stat, infer_experiment, read_duplication,
read_distribution, junction_annotation, junction_saturation, inner_distance,
-TIN), Qualimap RNA-seq QC, preseq library complexity, and samtools-compatible
+TIN), Qualimap RNA-seq QC, bigWig genome coverage tracks, preseq library complexity, and samtools-compatible
outputs.
### Synopsis
@@ -54,7 +54,7 @@ Path to a GTF gene annotation file (plain or gzip-compressed). **Required.**
The GTF must contain `exon` features with a `gene_id` attribute.
Transcript-level structure (exon blocks, CDS features) is extracted automatically
and used by all analyses: dupRadar, featureCounts, all 8 RSeQC tools (including TIN),
-Qualimap, preseq, and samtools.
+Qualimap, bigWig coverage tracks, preseq, and samtools.
Gzip compression is detected automatically by inspecting the file header (magic
bytes), so the `.gz` extension is not required.
@@ -79,6 +79,11 @@ Library strandedness for strand-aware read counting:
**Default:** `unstranded`
+For **bigWig coverage tracks**, stranded libraries also produce
+`{sample}.forward.bigWig` and `{sample}.reverse.bigWig` with nf-core/rnaseq-
+compatible strand filters. Unstranded libraries receive only `{sample}.bigWig`.
+See [bigWig](../rna/bigwig/) for details.
+
RustQC automatically checks this value against the
[infer_experiment](../rna/rseqc/#infer_experiment) results. If there is a
@@ -159,7 +164,7 @@ rustqc rna sample.markdup.sorted.bam --gtf genes.gtf --sample-name sample
Write all output files directly into the output directory instead of organizing
them into subdirectories. By default, RustQC creates `dupradar/`,
-`featurecounts/`, `rseqc//`, `qualimap/`, `preseq/`, and `samtools/`
+`featurecounts/`, `rseqc//`, `qualimap/`, `bigwig/`, `preseq/`, and `samtools/`
subdirectories under the output directory. With `--flat-output`, all files are
written to the top-level output directory.
diff --git a/docs/src/content/docs/usage/configuration.md b/docs/src/content/docs/usage/configuration.md
index a6c21d7..0d20506 100644
--- a/docs/src/content/docs/usage/configuration.md
+++ b/docs/src/content/docs/usage/configuration.md
@@ -19,7 +19,7 @@ settings live under a top-level `rna:` key, matching the `rustqc rna` subcommand
:::note
The `rna:` section controls all tools: dupRadar, featureCounts, all 8 RSeQC tools
-(including TIN), Qualimap, preseq, and samtools. Each tool has an `enabled` toggle
+(including TIN), Qualimap, bigWig coverage tracks, preseq, and samtools. Each tool has an `enabled` toggle
and tool-specific parameter overrides where applicable.
CLI flags take precedence over config file values.
:::
@@ -235,6 +235,10 @@ rna:
qualimap:
enabled: true
+ # bigWig genome coverage tracks (nf-core/rnaseq compatible)
+ bigwig:
+ enabled: true
+
# Library complexity (preseq lc_extrap)
preseq:
enabled: true
@@ -537,6 +541,32 @@ read origin classification (exonic/intronic/intergenic), strand-specificity
estimation, and splice junction motif counting. Produces Qualimap-compatible
output files parseable by MultiQC.
+## bigwig
+
+```yaml
+rna:
+ bigwig:
+ enabled: true # Set to false to skip bigWig coverage track generation
+```
+
+Generates nf-core/rnaseq-compatible genome-wide bigWig coverage tracks in the
+`bigwig/` subdirectory. No GTF annotation is used for coverage calculation, but
+`--gtf` is still required by the `rustqc rna` command.
+
+Produces:
+
+- `{sample}.bigWig` — combined strand-agnostic track (`bedtools genomecov -split -bg`)
+- `{sample}.forward.bigWig` and `{sample}.reverse.bigWig` — per-strand tracks
+ for stranded libraries (`-split -du -strand +/-`), with strand labels matching
+ nf-core/rnaseq conventions
+
+Set `--stranded forward` or `--stranded reverse` (or configure `stranded:` in
+YAML) to enable per-strand tracks. Unstranded libraries receive only the
+combined track.
+
+See [bigWig Coverage Tracks](../rna/bigwig/) for full details on parameters,
+validation, and nf-core integration.
+
## preseq
```yaml
diff --git a/docs/src/content/docs/usage/library.mdx b/docs/src/content/docs/usage/library.mdx
index 22215be..ad5046c 100644
--- a/docs/src/content/docs/usage/library.mdx
+++ b/docs/src/content/docs/usage/library.mdx
@@ -35,7 +35,7 @@ The crate exposes these modules:
| [`config`][docs-config] | Configuration types mirroring the CLI's YAML config file. |
| [`summary`][docs-summary] | Serializable types for the JSON run summary. |
| [`cpu`][docs-cpu] | CPU feature detection and binary-target identification. |
-| [`rna`][docs-rna] | RNA-Seq analyses: `dupradar`, `featurecounts`, `qualimap`, `preseq`, `rseqc`. |
+| [`rna`][docs-rna] | RNA-Seq analyses: `dupradar`, `featurecounts`, `qualimap`, `bigwig`, `preseq`, `rseqc`. |
[`Strandedness`][docs-strandedness] lives at the crate root because it is used
across most analysis modules.
diff --git a/src/citations.rs b/src/citations.rs
index 316ed4d..08de7a5 100644
--- a/src/citations.rs
+++ b/src/citations.rs
@@ -64,6 +64,14 @@ const QUALIMAP: Citation = Citation {
doi: "10.1093/bioinformatics/bts503",
};
+const BIGWIG: Citation = Citation {
+ heading: "bedtools genomecov (v2.31.1) + UCSC utilities",
+ description: "RustQC produces nf-core/rnaseq-compatible bigWig coverage tracks (bedtools genomecov, bedClip, bedGraphToBigWig).",
+ reference: "Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. *Bioinformatics*. 2010;26(6):841-842. Kent WJ, et al. The human genome browser at UCSC. *Genome Res*. 2002;12(6):996-1006.",
+ url: "https://github.com/arq5x/bedtools2",
+ doi: "10.1093/bioinformatics/btq033",
+};
+
fn write_citation(w: &mut impl Write, c: &Citation) -> std::io::Result<()> {
writeln!(w, "## {}\n", c.heading)?;
writeln!(w, "{}\n", c.description)?;
@@ -113,6 +121,9 @@ pub fn write_citations(path: &Path, config: &RnaConfig, version: &str, commit: &
if config.qualimap.enabled {
write_citation(&mut w, &QUALIMAP)?;
}
+ if config.bigwig.enabled {
+ write_citation(&mut w, &BIGWIG)?;
+ }
w.flush()?;
Ok(())
diff --git a/src/cli.rs b/src/cli.rs
index 6e6459e..2f9df7b 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -5,8 +5,9 @@
//! compatible output, RSeQC-equivalent metrics (bam_stat, infer_experiment,
//! read_duplication, read_distribution, junction_annotation, junction_saturation,
//! inner_distance), TIN (Transcript Integrity Number), preseq library complexity
-//! extrapolation, samtools-compatible outputs (flagstat, idxstats, stats), and
-//! Qualimap RNA-seq QC. Individual tools can be disabled via the YAML config file.
+//! extrapolation, samtools-compatible outputs (flagstat, idxstats, stats),
+//! Qualimap RNA-seq QC, and nf-core/rnaseq-compatible bigWig coverage tracks.
+//! Individual tools can be disabled via the YAML config file.
//!
//! A GTF gene annotation file is required for all analyses.
@@ -28,7 +29,7 @@ pub struct Cli {
pub enum Commands {
/// RNA-Seq QC — single-pass analysis of BAM/SAM/CRAM files.
///
- /// Runs featureCounts, dupRadar, Qualimap, samtools stats, and RSeQC
+ /// Runs featureCounts, dupRadar, Qualimap, bigWig coverage tracks, samtools stats, and RSeQC
/// analyses in one pass. Requires a GTF annotation and duplicate-marked
/// (not removed) alignments.
Rna(RnaArgs),
diff --git a/src/config.rs b/src/config.rs
index 4952a1f..62753c5 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -165,6 +165,10 @@ pub struct RnaConfig {
/// Qualimap RNA-Seq QC configuration.
#[serde(default)]
pub qualimap: QualimapConfig,
+
+ /// bigWig genome coverage track configuration.
+ #[serde(default)]
+ pub bigwig: BigwigConfig,
}
// ============================================================================
@@ -600,6 +604,30 @@ impl Default for QualimapConfig {
}
}
+/// Configuration for bigWig genome coverage tracks.
+///
+/// When enabled, produces nf-core/rnaseq-compatible bigWig files using
+/// bedtools genomecov-equivalent parameters (`-split -bg`, plus per-strand
+/// `-split -du -strand +/- -bg` for stranded libraries).
+///
+/// Example:
+/// ```yaml
+/// bigwig:
+/// enabled: true
+/// ```
+#[derive(Debug, Deserialize)]
+#[serde(default)]
+pub struct BigwigConfig {
+ /// Whether to generate bigWig coverage tracks. Defaults to true.
+ pub enabled: bool,
+}
+
+impl Default for BigwigConfig {
+ fn default() -> Self {
+ Self { enabled: true }
+ }
+}
+
/// Configuration for samtools idxstats-compatible output.
///
/// When enabled, produces a file matching `samtools idxstats` output format,
diff --git a/src/lib.rs b/src/lib.rs
index 9a228ca..60374e8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,7 +2,7 @@
//!
//! RustQC is primarily a CLI (`rustqc rna ...`) that runs a single-pass
//! RNA-Seq QC pipeline (dupRadar, featureCounts, 8 RSeQC tools, Qualimap,
-//! preseq, samtools-style outputs). The same analysis modules are also
+//! bigWig coverage tracks, preseq, samtools-style outputs). The same analysis modules are also
//! exposed as a library so they can be embedded into other Rust programs.
//!
//! # Adding RustQC as a dependency
@@ -25,7 +25,7 @@
//! - [`cpu`] — CPU feature detection and binary-target identification.
//! - [`rna`] — the RNA-Seq analysis modules:
//! - [`rna::dupradar`], [`rna::featurecounts`], [`rna::qualimap`],
-//! [`rna::preseq`], [`rna::rseqc`].
+//! [`rna::bigwig`], [`rna::preseq`], [`rna::rseqc`].
//!
//! [`Strandedness`] lives at the crate root because it is used across most
//! analysis modules.
diff --git a/src/main.rs b/src/main.rs
index 4c66c17..dddb5dc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -6,7 +6,8 @@
//! 8 RSeQC-equivalent tools (bam_stat, infer_experiment, read_duplication,
//! read_distribution, junction_annotation, junction_saturation, inner_distance, TIN),
//! preseq library complexity extrapolation, samtools-compatible outputs
-//! (flagstat, idxstats, stats), and Qualimap gene body coverage profiling.
+//! (flagstat, idxstats, stats), Qualimap gene body coverage profiling, and
+//! nf-core/rnaseq-compatible bigWig coverage tracks.
//! Individual tools can be disabled via the YAML config file.
mod citations;
@@ -1043,6 +1044,7 @@ fn process_single_bam(
None
},
qualimap_index.as_ref(),
+ config.bigwig.enabled,
Some(&pb),
)?;
let count_duration = count_start.elapsed();
@@ -1421,6 +1423,29 @@ fn process_single_bam(
written_outputs.push(("Qualimap".into(), p));
}
+ // === bigWig genome coverage tracks ===
+ if let Some(ref gc_result) = count_result.genomecov {
+ let bw_dir = if params.flat_output {
+ outdir.to_path_buf()
+ } else {
+ outdir.join("bigwig")
+ };
+
+ // Chromosome sizes are carried through from the count pass (BAM header
+ // order), so there is no need to re-open the BAM here.
+ let bw_outputs = rna::bigwig::write_bigwig_tracks(
+ &bw_dir,
+ &sample_name,
+ gc_result,
+ &gc_result.chrom_sizes,
+ threads,
+ )?;
+ for (label, path) in bw_outputs {
+ ui.output_item(&label, &path);
+ written_outputs.push((label, path));
+ }
+ }
+
// === RSeQC analyses (post-processing of single-pass accumulators) ===
let rseqc_accums = rseqc_accums.unwrap_or_else(|| {
ui.detail("No RSeQC tools enabled, skipping");
diff --git a/src/rna/bigwig/accumulator.rs b/src/rna/bigwig/accumulator.rs
new file mode 100644
index 0000000..33a214b
--- /dev/null
+++ b/src/rna/bigwig/accumulator.rs
@@ -0,0 +1,512 @@
+//! Genome coverage accumulation matching `bedtools genomecov`.
+//!
+//! Replicates nf-core/rnaseq bigWig generation parameters:
+//! - Combined track: `-split -bg`
+//! - Per-strand tracks (stranded libraries): `-split -du -strand +/- -bg`
+//!
+//! # Streaming, memory-bounded design
+//!
+//! A naive difference array allocates one counter per genome base, which for a
+//! mammalian genome is tens of GB per track (and the parallel path keeps every
+//! chromosome resident at once). Instead, because the input BAM is
+//! coordinate-sorted, each chromosome's reads arrive in ascending start order.
+//! We keep only a small sparse map of pending coverage deltas and a sweep
+//! cursor: once the cursor passes a position, no future read can change
+//! coverage there, so we emit the finished bedGraph intervals and drop the
+//! deltas below the cursor. Peak memory is therefore bounded by the widest
+//! single-read genomic span (introns included), not by chromosome length or
+//! sequencing depth.
+//!
+//! The emitted bedGraph is bit-for-bit identical to the equivalent dense
+//! difference-array sweep (see the brute-force equivalence tests below), which
+//! in turn matches `bedtools genomecov`.
+
+use std::collections::BTreeMap;
+
+use rust_htslib::bam;
+use rust_htslib::bam::record::Cigar;
+
+use crate::rna::bam_flags::BAM_FREAD2;
+use crate::Strandedness;
+
+/// Strand filter for per-strand coverage tracks.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum StrandFilter {
+ /// Forward reference strand (`-strand +`).
+ Plus,
+ /// Reverse reference strand (`-strand -`).
+ Minus,
+}
+
+/// Settings for a single coverage track.
+///
+/// All nf-core/rnaseq tracks use `-split` (reads are always split at splicing
+/// blocks), so splitting is unconditional rather than a toggle.
+#[derive(Debug, Clone, Copy, Default)]
+struct TrackSettings {
+ /// Flip second-mate strand for strand-specific libraries (`-du`).
+ deduplicate_strand: bool,
+ /// Optional strand filter (`-strand`).
+ strand: Option,
+}
+
+/// A single bedGraph interval: `(chrom, start, end, depth)` with `depth > 0`.
+type BedGraphEntry = (String, u32, u32, u32);
+
+/// Streaming coverage accumulator for one track (combined, forward, or reverse).
+///
+/// Holds finalized bedGraph intervals plus the sweep state for the chromosome
+/// currently being processed. Reads must be supplied in coordinate-sorted order
+/// within each chromosome (guaranteed by the BAM read loop).
+#[derive(Debug, Default, Clone)]
+pub struct TrackAccum {
+ settings: TrackSettings,
+ /// Finalized bedGraph intervals for all completed chromosomes.
+ bedgraph: Vec,
+
+ // --- sweep state for the active chromosome ---
+ /// Name of the chromosome currently being accumulated (empty if none).
+ chrom_name: String,
+ /// Length of the active chromosome, used to clamp interval ends.
+ chrom_size: u32,
+ /// Pending coverage deltas at positions at or ahead of the sweep cursor.
+ /// `+1` at a block start, `-1` at a block's half-open end.
+ deltas: BTreeMap,
+ /// Start of the currently open (constant-depth) run.
+ cur_start: u32,
+ /// Coverage depth of the currently open run.
+ cur_depth: i64,
+}
+
+impl TrackAccum {
+ fn new(settings: TrackSettings) -> Self {
+ Self {
+ settings,
+ ..Default::default()
+ }
+ }
+
+ /// Begin accumulating a new chromosome, finalizing any previous one.
+ fn start_chrom(&mut self, name: &str, size: u64) {
+ self.finish_chrom();
+ self.chrom_name.clear();
+ self.chrom_name.push_str(name);
+ self.chrom_size = size.min(u32::MAX as u64) as u32;
+ self.cur_start = 0;
+ self.cur_depth = 0;
+ debug_assert!(self.deltas.is_empty());
+ }
+
+ /// Record a half-open alignment block `[start, end)`, clamped to the
+ /// chromosome, as a pair of coverage deltas.
+ fn add_block(&mut self, start: u64, end: u64) {
+ let size = self.chrom_size as u64;
+ if size == 0 || start >= size {
+ return;
+ }
+ let end = end.min(size);
+ if end <= start {
+ return;
+ }
+ *self.deltas.entry(start as u32).or_insert(0) += 1;
+ *self.deltas.entry(end as u32).or_insert(0) -= 1;
+ }
+
+ /// Apply and emit all deltas strictly below `pos`. Safe to call once the
+ /// read cursor has advanced past `pos`, since no later read can add
+ /// coverage below it.
+ fn flush_below(&mut self, pos: u32) {
+ let keep = self.deltas.split_off(&pos);
+ let flush = std::mem::replace(&mut self.deltas, keep);
+ self.consume(flush);
+ }
+
+ /// Finalize the active chromosome, emitting all remaining intervals.
+ fn finish_chrom(&mut self) {
+ if self.chrom_name.is_empty() {
+ return;
+ }
+ let flush = std::mem::take(&mut self.deltas);
+ self.consume(flush);
+ // A balanced difference array always returns to depth 0.
+ debug_assert_eq!(self.cur_depth, 0);
+ self.cur_depth = 0;
+ self.chrom_name.clear();
+ }
+
+ /// Drive the emit state machine over a run of (position, delta) pairs in
+ /// ascending position order. Only depth *changes* create interval
+ /// boundaries, so net-zero positions merge automatically (matching
+ /// bedtools' equal-depth interval merging).
+ fn consume(&mut self, deltas: BTreeMap) {
+ for (pos, delta) in deltas {
+ if delta == 0 {
+ continue;
+ }
+ if self.cur_depth > 0 {
+ self.bedgraph.push((
+ self.chrom_name.clone(),
+ self.cur_start,
+ pos,
+ self.cur_depth as u32,
+ ));
+ }
+ self.cur_start = pos;
+ self.cur_depth += delta as i64;
+ }
+ }
+
+ fn merge(&mut self, mut other: Self) {
+ debug_assert!(other.chrom_name.is_empty(), "merge of unfinished track");
+ self.bedgraph.append(&mut other.bedgraph);
+ }
+}
+
+/// Bundle of coverage accumulators for all nf-core/rnaseq bigWig tracks.
+#[derive(Debug, Clone)]
+pub struct GenomeCovAccum {
+ stranded: Strandedness,
+ /// tid of the chromosome currently being accumulated (-1 = none).
+ current_tid: i32,
+ /// Strand-agnostic combined coverage (`-split -bg`).
+ pub combined: TrackAccum,
+ /// Forward-genome-strand coverage (stranded libraries only).
+ pub forward: Option,
+ /// Reverse-genome-strand coverage (stranded libraries only).
+ pub reverse: Option,
+ /// Scratch buffer for split-read alignment blocks, reused across reads to
+ /// avoid a per-record heap allocation in the hot loop.
+ blocks_buf: Vec<(u64, u64)>,
+}
+
+impl GenomeCovAccum {
+ /// Create accumulators for the given strandedness.
+ ///
+ /// Per-strand tracks are only created for stranded libraries.
+ pub fn new(stranded: Strandedness) -> Self {
+ let combined = TrackAccum::new(TrackSettings {
+ deduplicate_strand: false,
+ strand: None,
+ });
+ let (forward, reverse) = if stranded == Strandedness::Unstranded {
+ (None, None)
+ } else {
+ let (fwd_strand, rev_strand) = match stranded {
+ Strandedness::Forward => (StrandFilter::Plus, StrandFilter::Minus),
+ Strandedness::Reverse => (StrandFilter::Minus, StrandFilter::Plus),
+ Strandedness::Unstranded => unreachable!(),
+ };
+ (
+ Some(TrackAccum::new(TrackSettings {
+ deduplicate_strand: true,
+ strand: Some(fwd_strand),
+ })),
+ Some(TrackAccum::new(TrackSettings {
+ deduplicate_strand: true,
+ strand: Some(rev_strand),
+ })),
+ )
+ };
+ Self {
+ stranded,
+ current_tid: -1,
+ combined,
+ forward,
+ reverse,
+ blocks_buf: Vec::new(),
+ }
+ }
+
+ /// Merge another worker's (finished) accumulators into this one.
+ pub fn merge(&mut self, other: Self) {
+ self.combined.merge(other.combined);
+ if let (Some(ref mut f), Some(o)) = (&mut self.forward, other.forward) {
+ f.merge(o);
+ }
+ if let (Some(ref mut r), Some(o)) = (&mut self.reverse, other.reverse) {
+ r.merge(o);
+ }
+ }
+
+ /// Finalize the active chromosome on every track. Must be called once all
+ /// reads have been processed, before merging or reading results.
+ pub fn finish(&mut self) {
+ self.combined.finish_chrom();
+ if let Some(ref mut f) = self.forward {
+ f.finish_chrom();
+ }
+ if let Some(ref mut r) = self.reverse {
+ r.finish_chrom();
+ }
+ self.current_tid = -1;
+ }
+
+ /// Process a mapped BAM record, updating all enabled tracks.
+ ///
+ /// `tid`/`name`/`size` identify the record's reference. Records must arrive
+ /// in coordinate-sorted order, grouped by chromosome.
+ pub fn process_read(&mut self, record: &bam::Record, tid: u32, name: &str, size: u64) {
+ if record.is_unmapped() {
+ return;
+ }
+
+ // Chromosome transition: finalize the previous chromosome and reset
+ // the sweep state on every track.
+ if self.current_tid != tid as i32 {
+ self.combined.start_chrom(name, size);
+ if let Some(ref mut f) = self.forward {
+ f.start_chrom(name, size);
+ }
+ if let Some(ref mut r) = self.reverse {
+ r.start_chrom(name, size);
+ }
+ self.current_tid = tid as i32;
+ }
+
+ // Extract alignment blocks once; strand filtering differs per track but
+ // the blocks themselves do not.
+ genomecov_blocks(record, &mut self.blocks_buf);
+ let pos = record.pos().max(0) as u32;
+
+ add_read_to_track(&mut self.combined, record, &self.blocks_buf);
+ if self.stranded != Strandedness::Unstranded {
+ if let Some(ref mut f) = self.forward {
+ add_read_to_track(f, record, &self.blocks_buf);
+ }
+ if let Some(ref mut r) = self.reverse {
+ add_read_to_track(r, record, &self.blocks_buf);
+ }
+ }
+
+ // Advance the sweep cursor: positions below the current read's start are
+ // now final and can be emitted, releasing their deltas.
+ self.combined.flush_below(pos);
+ if let Some(ref mut f) = self.forward {
+ f.flush_below(pos);
+ }
+ if let Some(ref mut r) = self.reverse {
+ r.flush_below(pos);
+ }
+ }
+}
+
+/// Apply a read's blocks to one track if it passes the track's strand filter.
+fn add_read_to_track(track: &mut TrackAccum, record: &bam::Record, blocks: &[(u64, u64)]) {
+ let mut is_reverse = record.is_reverse();
+ // bedtools `-du`: flip second mate strand so both mates appear on the
+ // same fragment strand (dUTP/strand-specific PE libraries).
+ if track.settings.deduplicate_strand
+ && record.is_paired()
+ && !record.is_mate_unmapped()
+ && record.flags() & BAM_FREAD2 != 0
+ {
+ is_reverse = !is_reverse;
+ }
+
+ if let Some(filter) = track.settings.strand {
+ let want_reverse = filter == StrandFilter::Minus;
+ if is_reverse != want_reverse {
+ return;
+ }
+ }
+
+ for &(start, end) in blocks {
+ track.add_block(start, end);
+ }
+}
+
+/// Extract alignment blocks for `-split` mode, matching bedtools `GetBamBlocks`
+/// with `breakOnDeletionOps=true` (default, no `-ignoreD`).
+fn genomecov_blocks(record: &bam::Record, blocks: &mut Vec<(u64, u64)>) {
+ blocks.clear();
+ let mut ref_pos = record.pos().max(0) as u64;
+
+ for op in record.cigar().iter() {
+ match op {
+ Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => {
+ let len = *len as u64;
+ blocks.push((ref_pos, ref_pos + len));
+ ref_pos += len;
+ }
+ Cigar::RefSkip(len) | Cigar::Del(len) => {
+ ref_pos += *len as u64;
+ }
+ Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => {}
+ }
+ }
+}
+
+/// Merged genome coverage ready for bedGraph / bigWig output.
+#[derive(Debug)]
+pub struct GenomeCovResult {
+ /// Combined strand-agnostic track.
+ pub combined: TrackAccum,
+ /// Forward-genome-strand track (None for unstranded libraries).
+ pub forward: Option,
+ /// Reverse-genome-strand track (None for unstranded libraries).
+ pub reverse: Option,
+ /// Chromosome names and lengths in BAM header order, carried through so
+ /// output does not need to re-open the BAM to recover them.
+ pub chrom_sizes: Vec<(String, u64)>,
+}
+
+impl GenomeCovAccum {
+ /// Finalize the accumulator into a result, attaching the BAM header-ordered
+ /// chromosome sizes used for bedGraph ordering and bigWig writing.
+ ///
+ /// [`finish`](Self::finish) must have been called first.
+ pub fn into_result(self, chrom_sizes: Vec<(String, u64)>) -> GenomeCovResult {
+ debug_assert_eq!(self.current_tid, -1, "into_result before finish()");
+ GenomeCovResult {
+ combined: self.combined,
+ forward: self.forward,
+ reverse: self.reverse,
+ chrom_sizes,
+ }
+ }
+}
+
+/// Return a track's bedGraph intervals grouped in `chrom_order` (BAM header)
+/// order. Intervals within each chromosome are already in ascending position
+/// order from the streaming sweep, so a stable sort by chromosome suffices.
+pub fn track_to_bedgraph(track: &TrackAccum, chrom_order: &[(String, u64)]) -> Vec {
+ let order: std::collections::HashMap<&str, usize> = chrom_order
+ .iter()
+ .enumerate()
+ .map(|(i, (n, _))| (n.as_str(), i))
+ .collect();
+
+ let mut entries = track.bedgraph.clone();
+ entries.sort_by_key(|(chrom, _, _, _)| *order.get(chrom.as_str()).unwrap_or(&usize::MAX));
+ entries
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Brute-force reference: per-base coverage from half-open blocks, clamped
+ /// to `size`, converted to merged bedGraph intervals. Obviously correct and
+ /// independent of the streaming implementation.
+ fn brute_force_bedgraph(
+ chrom: &str,
+ size: u32,
+ blocks: &[(u64, u64)],
+ ) -> Vec<(String, u32, u32, u32)> {
+ let mut depth = vec![0u32; size as usize];
+ for &(start, end) in blocks {
+ let end = end.min(size as u64);
+ for p in start..end {
+ if p < size as u64 {
+ depth[p as usize] += 1;
+ }
+ }
+ }
+ let mut out = Vec::new();
+ let mut i = 0usize;
+ while i < depth.len() {
+ if depth[i] == 0 {
+ i += 1;
+ continue;
+ }
+ let d = depth[i];
+ let start = i;
+ while i < depth.len() && depth[i] == d {
+ i += 1;
+ }
+ out.push((chrom.to_string(), start as u32, i as u32, d));
+ }
+ out
+ }
+
+ /// Feed blocks (as one-block "reads", sorted by start) through the streaming
+ /// accumulator and return the combined bedGraph.
+ fn stream_bedgraph(chrom: &str, size: u64, blocks: &[(u64, u64)]) -> Vec {
+ let mut track = TrackAccum::new(TrackSettings::default());
+ track.start_chrom(chrom, size);
+ // Blocks must be applied in ascending start order with the sweep cursor.
+ let mut sorted = blocks.to_vec();
+ sorted.sort_by_key(|&(s, _)| s);
+ for &(s, e) in &sorted {
+ track.add_block(s, e);
+ track.flush_below(s as u32);
+ }
+ track.finish_chrom();
+ track.bedgraph
+ }
+
+ #[test]
+ fn test_simple_overlap() {
+ let blocks = [(2u64, 5u64), (2, 5)];
+ assert_eq!(
+ stream_bedgraph("chr1", 10, &blocks),
+ brute_force_bedgraph("chr1", 10, &blocks)
+ );
+ }
+
+ #[test]
+ fn test_adjacent_and_nested() {
+ let blocks = [(0u64, 10u64), (3, 6), (6, 9), (9, 12)];
+ assert_eq!(
+ stream_bedgraph("chr1", 20, &blocks),
+ brute_force_bedgraph("chr1", 20, &blocks)
+ );
+ }
+
+ #[test]
+ fn test_clamping_past_chrom_end() {
+ let blocks = [(8u64, 15u64), (5, 12)];
+ assert_eq!(
+ stream_bedgraph("chr1", 10, &blocks),
+ brute_force_bedgraph("chr1", 10, &blocks)
+ );
+ }
+
+ #[test]
+ fn test_net_zero_position_merges() {
+ // A block ending exactly where another begins keeps depth constant
+ // across the boundary, so the intervals must merge.
+ let blocks = [(0u64, 5u64), (5, 10)];
+ let got = stream_bedgraph("chr1", 10, &blocks);
+ assert_eq!(got, vec![("chr1".to_string(), 0, 10, 1)]);
+ assert_eq!(got, brute_force_bedgraph("chr1", 10, &blocks));
+ }
+
+ #[test]
+ fn test_randomised_against_brute_force() {
+ // Deterministic LCG so the test is reproducible without rand.
+ let mut state = 0x1234_5678u64;
+ let mut next = || {
+ state = state
+ .wrapping_mul(6364136223846793005)
+ .wrapping_add(1442695040888963407);
+ (state >> 33) as u32
+ };
+ for trial in 0..200 {
+ let size = 50 + (next() % 150);
+ let n = next() % 60;
+ let mut blocks = Vec::new();
+ for _ in 0..n {
+ let start = next() % size;
+ let len = 1 + next() % 20;
+ blocks.push((start as u64, (start + len) as u64));
+ }
+ let stream = stream_bedgraph("c", size as u64, &blocks);
+ let brute = brute_force_bedgraph("c", size, &blocks);
+ assert_eq!(stream, brute, "mismatch on trial {trial}");
+ }
+ }
+
+ #[test]
+ fn test_track_to_bedgraph_orders_by_header() {
+ let mut track = TrackAccum::new(TrackSettings::default());
+ // Emit chr2 before chr1 to exercise reordering.
+ track.bedgraph = vec![("chr2".to_string(), 0, 5, 1), ("chr1".to_string(), 0, 5, 1)];
+ let order = vec![("chr1".to_string(), 100u64), ("chr2".to_string(), 100u64)];
+ let out = track_to_bedgraph(&track, &order);
+ assert_eq!(
+ out,
+ vec![("chr1".to_string(), 0, 5, 1), ("chr2".to_string(), 0, 5, 1),]
+ );
+ }
+}
diff --git a/src/rna/bigwig/mod.rs b/src/rna/bigwig/mod.rs
new file mode 100644
index 0000000..d23dc29
--- /dev/null
+++ b/src/rna/bigwig/mod.rs
@@ -0,0 +1,10 @@
+//! Genome-wide coverage bigWig track generation.
+//!
+//! Reimplements the nf-core/rnaseq bigWig pipeline (`bedtools genomecov` +
+//! UCSC `bedClip` + `bedGraphToBigWig`) in a single streaming pass over the BAM.
+
+pub mod accumulator;
+pub mod output;
+
+pub use accumulator::{GenomeCovAccum, GenomeCovResult};
+pub use output::write_bigwig_tracks;
diff --git a/src/rna/bigwig/output.rs b/src/rna/bigwig/output.rs
new file mode 100644
index 0000000..ab18d1a
--- /dev/null
+++ b/src/rna/bigwig/output.rs
@@ -0,0 +1,128 @@
+//! bigWig output generation from genome coverage accumulators.
+//!
+//! Converts accumulated coverage to bedGraph, clips to chromosome boundaries
+//! (UCSC `bedClip`), and writes bigWig files (via the `bigtools` crate).
+
+use std::collections::HashMap;
+use std::path::Path;
+
+use anyhow::{Context, Result};
+use bigtools::beddata::BedParserStreamingIterator;
+use bigtools::{BigWigWrite, Value};
+
+use super::accumulator::{track_to_bedgraph, GenomeCovResult, TrackAccum};
+
+/// Write all bigWig coverage tracks for a sample.
+///
+/// Produces nf-core/rnaseq-compatible filenames:
+/// - `{sample}.bigWig` — combined (always)
+/// - `{sample}.forward.bigWig` — forward strand (stranded libraries)
+/// - `{sample}.reverse.bigWig` — reverse strand (stranded libraries)
+pub fn write_bigwig_tracks(
+ outdir: &Path,
+ sample_name: &str,
+ result: &GenomeCovResult,
+ chrom_sizes: &[(String, u64)],
+ threads: usize,
+) -> Result> {
+ std::fs::create_dir_all(outdir).with_context(|| {
+ format!(
+ "Failed to create bigwig output directory: {}",
+ outdir.display()
+ )
+ })?;
+
+ let mut written = Vec::new();
+
+ let combined_path = outdir.join(format!("{sample_name}.bigWig"));
+ if write_track_bigwig(&combined_path, &result.combined, chrom_sizes, threads)? {
+ written.push((
+ "bigWig (combined)".into(),
+ combined_path.display().to_string(),
+ ));
+ }
+
+ if let Some(ref forward) = result.forward {
+ let path = outdir.join(format!("{sample_name}.forward.bigWig"));
+ if write_track_bigwig(&path, forward, chrom_sizes, threads)? {
+ written.push(("bigWig (forward)".into(), path.display().to_string()));
+ }
+ }
+
+ if let Some(ref reverse) = result.reverse {
+ let path = outdir.join(format!("{sample_name}.reverse.bigWig"));
+ if write_track_bigwig(&path, reverse, chrom_sizes, threads)? {
+ written.push(("bigWig (reverse)".into(), path.display().to_string()));
+ }
+ }
+
+ Ok(written)
+}
+
+/// Write a single track as a bigWig file.
+///
+/// Skips writing when the track has no coverage intervals (e.g. an empty
+/// per-strand track), matching upstream behaviour where `bedGraphToBigWig`
+/// receives an empty bedGraph. Interval ends are already clamped to each
+/// chromosome's length during accumulation, so no separate `bedClip` step is
+/// needed.
+fn write_track_bigwig(
+ path: &Path,
+ track: &TrackAccum,
+ chrom_sizes: &[(String, u64)],
+ threads: usize,
+) -> Result {
+ let bedgraph = track_to_bedgraph(track, chrom_sizes);
+ if bedgraph.is_empty() {
+ log::debug!(
+ "Skipping empty bigWig track (no coverage intervals): {}",
+ path.display()
+ );
+ return Ok(false);
+ }
+ write_bedgraph_bigwig(path, bedgraph, chrom_sizes, threads)?;
+ Ok(true)
+}
+
+/// Write bedGraph entries to a bigWig file using bigtools.
+fn write_bedgraph_bigwig(
+ path: &Path,
+ entries: Vec<(String, u32, u32, u32)>,
+ chrom_sizes: &[(String, u64)],
+ threads: usize,
+) -> Result<()> {
+ let chrom_map: HashMap = chrom_sizes
+ .iter()
+ .map(|(name, size)| (name.clone(), *size as u32))
+ .collect();
+
+ let iter = entries.into_iter().map(|(chrom, start, end, val)| {
+ (
+ chrom,
+ Value {
+ start,
+ end,
+ value: val as f32,
+ },
+ )
+ });
+ let data = BedParserStreamingIterator::wrap_infallible_iter(iter, true);
+
+ let runtime = if threads <= 1 {
+ tokio::runtime::Builder::new_current_thread()
+ .build()
+ .context("Failed to create tokio runtime for bigWig writing")?
+ } else {
+ tokio::runtime::Builder::new_multi_thread()
+ .worker_threads(threads)
+ .build()
+ .context("Failed to create tokio runtime for bigWig writing")?
+ };
+
+ let outb = BigWigWrite::create_file(path.to_string_lossy().to_string(), chrom_map)
+ .context("Failed to create bigWig writer")?;
+ outb.write(data, runtime)
+ .map_err(|e| anyhow::anyhow!("Failed to write bigWig to {}: {e}", path.display()))?;
+
+ Ok(())
+}
diff --git a/src/rna/dupradar/counting.rs b/src/rna/dupradar/counting.rs
index 785115c..98136b7 100644
--- a/src/rna/dupradar/counting.rs
+++ b/src/rna/dupradar/counting.rs
@@ -11,6 +11,7 @@
use crate::gtf::Gene;
use crate::io::format_count;
+use crate::rna::bigwig::GenomeCovAccum;
use crate::rna::qualimap::QualimapAccum;
use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig};
use crate::Strandedness;
@@ -299,6 +300,8 @@ pub struct CountResult {
/// Qualimap RNA-Seq QC results (if enabled).
#[allow(dead_code)]
pub qualimap: Option,
+ /// Genome coverage for bigWig tracks (if enabled).
+ pub genomecov: Option,
}
/// Metadata stored with each interval in the cache-oblivious interval tree.
@@ -542,6 +545,9 @@ struct ChromResult {
/// Qualimap RNA-Seq QC accumulator (if enabled).
qualimap: Option,
+
+ /// Genome coverage accumulator for bigWig tracks (if enabled).
+ genomecov: Option,
}
impl ChromResult {
@@ -575,6 +581,7 @@ impl ChromResult {
fc_biotype_ambiguous: 0,
fc_biotype_no_features: 0,
qualimap: None,
+ genomecov: None,
}
}
@@ -666,6 +673,14 @@ impl ChromResult {
self.qualimap = Some(other_qm);
}
}
+ // Merge genome coverage accumulators
+ if let Some(other_gc) = other.genomecov {
+ if let Some(ref mut self_gc) = self.genomecov {
+ self_gc.merge(other_gc);
+ } else {
+ self.genomecov = Some(other_gc);
+ }
+ }
}
}
@@ -979,6 +994,8 @@ fn process_chromosome_batch(
rseqc_annotations: Option<&RseqcAnnotations>,
htslib_threads: usize,
qualimap_index: Option<&crate::rna::qualimap::QualimapIndex>,
+ genomecov_enabled: bool,
+ tid_to_len: &[u64],
gene_to_biotype: &[u16],
num_biotypes: usize,
progress: Option<&ProgressBar>,
@@ -987,6 +1004,9 @@ fn process_chromosome_batch(
if qualimap_index.is_some() {
result.qualimap = Some(crate::rna::qualimap::QualimapAccum::new(stranded));
}
+ if genomecov_enabled {
+ result.genomecov = Some(GenomeCovAccum::new(stranded));
+ }
let mut rseqc_accums = rseqc_config.map(|cfg| RseqcAccumulators::new(cfg, rseqc_annotations));
// Pre-compute resolved chromosome names per TID (apply prefix/mapping once,
@@ -1074,6 +1094,16 @@ fn process_chromosome_batch(
}
}
+ // --- bigWig genome coverage (bedtools genomecov equivalent) ---
+ // Only skips unmapped reads; uses BAM reference names.
+ if let Some(ref mut gc) = result.genomecov {
+ let tid = record.tid();
+ if tid >= 0 && (tid as usize) < tid_to_name.len() {
+ let tid = tid as usize;
+ gc.process_read(&record, tid as u32, &tid_to_name[tid], tid_to_len[tid]);
+ }
+ }
+
// Resolve chromosome for gene counting
let rec_tid = record.tid();
let chrom = if rec_tid >= 0 && (rec_tid as usize) < tid_to_gtf_chrom.len() {
@@ -1098,6 +1128,12 @@ fn process_chromosome_batch(
}
}
+ // Finalize the active chromosome's coverage so the accumulator holds only
+ // sparse bedGraph intervals (no live per-base state) before merging.
+ if let Some(ref mut gc) = result.genomecov {
+ gc.finish();
+ }
+
// Move unmatched mates into the result for cross-chromosome reconciliation
result.unmatched_mates = mate_buffer;
Ok((result, rseqc_accums))
@@ -1152,6 +1188,7 @@ pub fn count_reads(
rseqc_config: Option<&RseqcConfig>,
rseqc_annotations: Option<&RseqcAnnotations>,
qualimap_index: Option<&crate::rna::qualimap::QualimapIndex>,
+ genomecov_enabled: bool,
progress: Option<&ProgressBar>,
) -> Result {
// Build gene ID interner for allocation-free lookups in the hot loop
@@ -1300,6 +1337,8 @@ pub fn count_reads(
rseqc_annotations,
htslib_threads,
qualimap_index,
+ genomecov_enabled,
+ &tid_to_len,
&gene_to_biotype,
num_biotypes,
progress,
@@ -1332,6 +1371,11 @@ pub fn count_reads(
rseqc_config.map(|cfg| RseqcAccumulators::new(cfg, rseqc_annotations));
let mut qualimap_accum: Option =
qualimap_index.map(|_| crate::rna::qualimap::QualimapAccum::new(stranded));
+ let mut genomecov_accum = if genomecov_enabled {
+ Some(GenomeCovAccum::new(stranded))
+ } else {
+ None
+ };
// Pre-compute resolved chromosome names for RSeQC tools
// (apply chromosome prefix and mapping, same as parallel path)
@@ -1417,6 +1461,15 @@ pub fn count_reads(
}
}
+ // --- bigWig genome coverage (bedtools genomecov equivalent) ---
+ if let Some(ref mut gc) = genomecov_accum {
+ let tid = record.tid();
+ if tid >= 0 && (tid as usize) < tid_to_name.len() {
+ let tid = tid as usize;
+ gc.process_read(&record, tid as u32, &tid_to_name[tid], tid_to_len[tid]);
+ }
+ }
+
// Resolve chromosome for gene counting
let tid = record.tid();
let chrom = if tid >= 0 && (tid as usize) < tid_to_rseqc_chrom.len() {
@@ -1442,6 +1495,12 @@ pub fn count_reads(
result.unmatched_mates = mate_buffer;
result.qualimap = qualimap_accum;
+ // Finalize the active chromosome's coverage before handing the
+ // accumulator on (leaves only sparse bedGraph intervals).
+ if let Some(ref mut gc) = genomecov_accum {
+ gc.finish();
+ }
+ result.genomecov = genomecov_accum;
(result, rseqc_accums)
};
@@ -1715,6 +1774,14 @@ pub fn count_reads(
}
_ => None,
},
+ genomecov: merged.genomecov.map(|acc| {
+ let chrom_sizes: Vec<(String, u64)> = tid_to_name
+ .iter()
+ .cloned()
+ .zip(tid_to_len.iter().copied())
+ .collect();
+ acc.into_result(chrom_sizes)
+ }),
})
}
diff --git a/src/rna/featurecounts/output.rs b/src/rna/featurecounts/output.rs
index 424c1a7..c1ee2d2 100644
--- a/src/rna/featurecounts/output.rs
+++ b/src/rna/featurecounts/output.rs
@@ -375,6 +375,7 @@ mod tests {
fc_biotype_no_features: 10,
rseqc: None,
qualimap: None,
+ genomecov: None,
};
let biotypes = aggregate_biotype_counts(&count_result);
diff --git a/src/rna/mod.rs b/src/rna/mod.rs
index 7dbcc09..99bb2bd 100644
--- a/src/rna/mod.rs
+++ b/src/rna/mod.rs
@@ -4,6 +4,7 @@
//! and RSeQC tool reimplementations.
pub mod bam_flags;
+pub mod bigwig;
pub mod cpp_rng;
pub mod dupradar;
pub mod featurecounts;
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index c7ea3ac..30f988b 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -1026,3 +1026,471 @@ fn test_dup_check_parallel_uses_global_duplicate_state() {
let _ = fs::remove_dir_all(root);
}
+
+// ============================================================================
+// bigWig coverage track tests (bedtools genomecov + UCSC bedGraphToBigWig)
+// ============================================================================
+
+/// Parse a 4-column bedGraph file into tuples.
+fn read_bedgraph(path: &Path) -> Vec<(String, u32, u32, u32)> {
+ fs::read_to_string(path)
+ .unwrap_or_else(|e| panic!("Failed to read bedGraph {}: {e}", path.display()))
+ .lines()
+ .filter(|l| !l.is_empty())
+ .map(|line| {
+ let parts: Vec<&str> = line.split('\t').collect();
+ (
+ parts[0].to_string(),
+ parts[1].parse().unwrap(),
+ parts[2].parse().unwrap(),
+ parts[3].parse::().unwrap() as u32,
+ )
+ })
+ .collect()
+}
+
+fn which_bedtools() -> Option {
+ for candidate in ["/tmp/bedtools2/bin/bedtools", "bedtools"] {
+ if Command::new(candidate)
+ .arg("--version")
+ .stdout(std::process::Stdio::null())
+ .stderr(std::process::Stdio::null())
+ .status()
+ .map(|s| s.success())
+ .unwrap_or(false)
+ {
+ return Some(candidate.to_string());
+ }
+ }
+ None
+}
+
+fn which_ucsc_tool(name: &str) -> Option {
+ [format!("/tmp/{name}"), name.to_string()]
+ .into_iter()
+ .find(|candidate| Path::new(candidate).exists())
+}
+
+fn bigwig_to_bedgraph(bw: &Path, out: &Path) -> bool {
+ let bgtobg = match which_ucsc_tool("bigWigToBedGraph") {
+ Some(p) => p,
+ None => return false,
+ };
+ Command::new(&bgtobg)
+ .args([bw.to_str().unwrap(), out.to_str().unwrap()])
+ .status()
+ .map(|s| s.success())
+ .unwrap_or(false)
+}
+
+fn write_test_chrom_sizes(path: &Path) {
+ use rust_htslib::bam::Read;
+ let r = rust_htslib::bam::Reader::from_path("tests/data/test.bam").unwrap();
+ let h = r.header();
+ let mut sizes = String::new();
+ for tid in 0..h.target_count() {
+ let name = String::from_utf8_lossy(h.tid2name(tid as u32));
+ let len = h.target_len(tid as u32).unwrap_or(0);
+ sizes.push_str(&format!("{name}\t{len}\n"));
+ }
+ fs::write(path, sizes).unwrap();
+}
+
+fn sort_bedgraph_file(path: &Path) {
+ let sorted = fs::read_to_string(path).unwrap();
+ let mut lines: Vec = sorted.lines().map(String::from).collect();
+ lines.sort_by(|a, b| {
+ let pa: Vec<&str> = a.split('\t').collect();
+ let pb: Vec<&str> = b.split('\t').collect();
+ pa[0].cmp(pb[0]).then_with(|| {
+ pa[1]
+ .parse::()
+ .unwrap()
+ .cmp(&pb[1].parse::().unwrap())
+ })
+ });
+ fs::write(path, lines.join("\n") + "\n").unwrap();
+}
+
+#[test]
+fn test_bigwig_combined_matches_bedtools_genomecov() {
+ if which_bedtools().is_none() || which_ucsc_tool("bigWigToBedGraph").is_none() {
+ eprintln!("Skipping bigWig cross-check: upstream tools not found");
+ return;
+ }
+
+ let root = unique_test_dir("bigwig");
+ fs::create_dir_all(&root).unwrap();
+ let chrom_sizes_path = root.join("chrom.sizes");
+ write_test_chrom_sizes(&chrom_sizes_path);
+
+ let bedtools = which_bedtools().unwrap();
+ let bg = root.join("combined.bedGraph");
+ let clip = root.join("combined.clip.bedGraph");
+ let ref_bw = root.join("ref.bigWig");
+
+ let status = Command::new(&bedtools)
+ .args(["genomecov", "-ibam", "tests/data/test.bam", "-split", "-bg"])
+ .stdout(std::process::Stdio::from(
+ std::fs::File::create(&bg).expect("create bedGraph"),
+ ))
+ .status()
+ .expect("bedtools");
+ assert!(status.success());
+ sort_bedgraph_file(&bg);
+
+ Command::new(which_ucsc_tool("bedClip").unwrap())
+ .args([
+ bg.to_str().unwrap(),
+ chrom_sizes_path.to_str().unwrap(),
+ clip.to_str().unwrap(),
+ ])
+ .status()
+ .unwrap();
+ Command::new(which_ucsc_tool("bedGraphToBigWig").unwrap())
+ .args([
+ clip.to_str().unwrap(),
+ chrom_sizes_path.to_str().unwrap(),
+ ref_bw.to_str().unwrap(),
+ ])
+ .status()
+ .unwrap();
+
+ let outdir = root.join("rustqc");
+ let output = run_rustqc_impl(outdir.to_str().unwrap(), "tests/data/test.bam", true);
+ assert!(
+ output.status.success(),
+ "rustqc failed:\n{}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+
+ let rust_bw = outdir.join("bigwig/test.bigWig");
+ assert!(rust_bw.exists(), "RustQC bigWig missing");
+
+ let ref_bg = root.join("ref.bg");
+ let rust_bg = root.join("rust.bg");
+ assert!(bigwig_to_bedgraph(&ref_bw, &ref_bg));
+ assert!(bigwig_to_bedgraph(&rust_bw, &rust_bg));
+ assert_eq!(read_bedgraph(&ref_bg), read_bedgraph(&rust_bg));
+
+ let _ = fs::remove_dir_all(root);
+}
+
+#[test]
+fn test_bigwig_stranded_forward_matches_bedtools() {
+ if which_bedtools().is_none() || which_ucsc_tool("bigWigToBedGraph").is_none() {
+ eprintln!("Skipping stranded bigWig cross-check: upstream tools not found");
+ return;
+ }
+
+ let root = unique_test_dir("bigwig-stranded");
+ fs::create_dir_all(&root).unwrap();
+ let chrom_sizes_path = root.join("chrom.sizes");
+ write_test_chrom_sizes(&chrom_sizes_path);
+
+ let bedtools = which_bedtools().unwrap();
+ let bg = root.join("forward.bedGraph");
+ let clip = root.join("forward.clip.bedGraph");
+ let ref_bw = root.join("ref.forward.bigWig");
+
+ let status = Command::new(&bedtools)
+ .args([
+ "genomecov",
+ "-ibam",
+ "tests/data/test.bam",
+ "-split",
+ "-du",
+ "-strand",
+ "+",
+ "-bg",
+ ])
+ .stdout(std::process::Stdio::from(
+ std::fs::File::create(&bg).expect("create bedGraph"),
+ ))
+ .status()
+ .expect("bedtools");
+ assert!(status.success());
+ sort_bedgraph_file(&bg);
+
+ Command::new(which_ucsc_tool("bedClip").unwrap())
+ .args([
+ bg.to_str().unwrap(),
+ chrom_sizes_path.to_str().unwrap(),
+ clip.to_str().unwrap(),
+ ])
+ .status()
+ .unwrap();
+ Command::new(which_ucsc_tool("bedGraphToBigWig").unwrap())
+ .args([
+ clip.to_str().unwrap(),
+ chrom_sizes_path.to_str().unwrap(),
+ ref_bw.to_str().unwrap(),
+ ])
+ .status()
+ .unwrap();
+
+ let outdir = root.join("rustqc");
+ let binary = rustqc_binary();
+ let output = Command::new(&binary)
+ .args([
+ "rna",
+ "tests/data/test.bam",
+ "--gtf",
+ "tests/data/test.gtf",
+ "--outdir",
+ outdir.to_str().unwrap(),
+ "--stranded",
+ "forward",
+ "--skip-dup-check",
+ ])
+ .output()
+ .expect("rustqc");
+ assert!(
+ output.status.success(),
+ "rustqc failed:\n{}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+
+ let rust_bw = outdir.join("bigwig/test.forward.bigWig");
+ assert!(rust_bw.exists());
+
+ let ref_bg = root.join("ref.bg");
+ let rust_bg = root.join("rust.bg");
+ assert!(bigwig_to_bedgraph(&ref_bw, &ref_bg));
+ assert!(bigwig_to_bedgraph(&rust_bw, &rust_bg));
+ assert_eq!(read_bedgraph(&ref_bg), read_bedgraph(&rust_bg));
+
+ let _ = fs::remove_dir_all(root);
+}
+
+/// Build a synthetic BAM exercising every bigWig strand/CIGAR/flag edge case,
+/// and confirm the combined and per-strand tracks match `bedtools genomecov`
+/// exactly. The reads deliberately cover:
+///
+/// - read-1 forward/reverse (never strand-flipped under `-du`)
+/// - read-2 forward/reverse with mate **mapped** (flipped under `-du`)
+/// - read-2 forward/reverse with mate **unmapped** (NOT flipped — the subtle
+/// case: bedtools only flips when the mate is mapped)
+/// - read-1 with an unmapped mate, and single-end reads (no flip)
+/// - spliced (`N`) and deletion (`D`) CIGARs (`-split` block boundaries)
+/// - a read overhanging the chromosome end (clamping)
+/// - secondary, duplicate, QC-fail and supplementary alignments (all counted)
+/// - a fully unmapped read (ignored)
+/// - a second chromosome (exercises the parallel per-chromosome merge)
+///
+/// Run at one and four threads to cover the sequential and parallel paths.
+#[test]
+fn test_bigwig_strand_edge_cases_match_bedtools() {
+ if which_bedtools().is_none()
+ || which_ucsc_tool("bigWigToBedGraph").is_none()
+ || which_ucsc_tool("bedClip").is_none()
+ || which_ucsc_tool("bedGraphToBigWig").is_none()
+ {
+ eprintln!("Skipping bigWig strand edge-case cross-check: upstream tools not found");
+ return;
+ }
+
+ let root = unique_test_dir("bigwig-strand-edge");
+ let bam = root.join("strand.bam");
+ let gtf = root.join("strand.gtf");
+ let chrom_sizes = root.join("chrom.sizes");
+
+ // SEQ/QUAL are 10 bases for every record (query length is 10 for 10M,
+ // 5M5N5M and 5M2D5M alike), so a single constant pair suffices.
+ write_bam_fixture(
+ &bam,
+ &[("chrTest", 300), ("chrTest2", 200)],
+ &[
+ // chrTest — coordinate-sorted
+ "r1_fwd_mm\t65\tchrTest\t11\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "r1_rev_mm\t81\tchrTest\t31\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "r2_fwd_mm\t129\tchrTest\t51\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "r2_rev_mm\t145\tchrTest\t71\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "r2_fwd_mu\t137\tchrTest\t91\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "r2_rev_mu\t153\tchrTest\t111\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "r1_fwd_mu\t73\tchrTest\t131\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "se_fwd\t0\tchrTest\t151\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "secondary_fwd\t256\tchrTest\t151\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "se_rev\t16\tchrTest\t171\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "spliced_fwd\t65\tchrTest\t191\t60\t5M5N5M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "del_fwd\t0\tchrTest\t211\t60\t5M2D5M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "dup_fwd\t1024\tchrTest\t231\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "qcfail_fwd\t512\tchrTest\t241\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "supp_fwd\t2048\tchrTest\t251\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "clipend_fwd\t0\tchrTest\t295\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ // chrTest2
+ "t2_fwd\t0\tchrTest2\t11\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "t2_rev\t16\tchrTest2\t51\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ // unmapped read (sorted last) — must be ignored by both tools
+ "unmapped\t4\t*\t0\t0\t*\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ ],
+ );
+ write_test_gtf(>f, &["chrTest", "chrTest2"]);
+ fs::write(&chrom_sizes, "chrTest\t300\nchrTest2\t200\n").unwrap();
+
+ let bedtools = which_bedtools().unwrap();
+ let binary = rustqc_binary();
+
+ // (track filename suffix, bedtools genomecov strand args)
+ let tracks: [(&str, &[&str]); 3] = [
+ ("strand.bigWig", &["-split"]),
+ ("strand.forward.bigWig", &["-split", "-du", "-strand", "+"]),
+ ("strand.reverse.bigWig", &["-split", "-du", "-strand", "-"]),
+ ];
+
+ for threads in ["1", "4"] {
+ let outdir = root.join(format!("out_t{threads}"));
+ let output = Command::new(&binary)
+ .args([
+ "rna",
+ bam.to_str().unwrap(),
+ "--gtf",
+ gtf.to_str().unwrap(),
+ "--outdir",
+ outdir.to_str().unwrap(),
+ "--stranded",
+ "forward",
+ "--threads",
+ threads,
+ "--skip-dup-check",
+ ])
+ .output()
+ .expect("rustqc");
+ assert!(
+ output.status.success(),
+ "rustqc failed (threads={threads}):\n{}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+
+ for (suffix, strand_args) in &tracks {
+ let label = format!("threads={threads} track={suffix}");
+
+ // Reference: bedtools genomecov -> bedClip -> bedGraphToBigWig.
+ let raw = root.join(format!("ref.{threads}.{suffix}.bg"));
+ let mut args: Vec<&str> = vec!["genomecov", "-ibam", bam.to_str().unwrap()];
+ args.extend_from_slice(strand_args);
+ args.push("-bg");
+ let status = Command::new(&bedtools)
+ .args(&args)
+ .stdout(std::process::Stdio::from(
+ std::fs::File::create(&raw).expect("create bedGraph"),
+ ))
+ .status()
+ .expect("bedtools");
+ assert!(status.success(), "bedtools failed for {label}");
+ sort_bedgraph_file(&raw);
+
+ let clip = root.join(format!("ref.{threads}.{suffix}.clip.bg"));
+ Command::new(which_ucsc_tool("bedClip").unwrap())
+ .args([
+ raw.to_str().unwrap(),
+ chrom_sizes.to_str().unwrap(),
+ clip.to_str().unwrap(),
+ ])
+ .status()
+ .unwrap();
+ let ref_bw = root.join(format!("ref.{threads}.{suffix}.bw"));
+ Command::new(which_ucsc_tool("bedGraphToBigWig").unwrap())
+ .args([
+ clip.to_str().unwrap(),
+ chrom_sizes.to_str().unwrap(),
+ ref_bw.to_str().unwrap(),
+ ])
+ .status()
+ .unwrap();
+
+ // bedtools emits no per-strand track when there is zero coverage;
+ // RustQC likewise skips the file. Treat "no reference intervals"
+ // and "no RustQC file" as consistent.
+ let ref_bg = root.join(format!("ref.{threads}.{suffix}.dec.bg"));
+ let has_ref =
+ bigwig_to_bedgraph(&ref_bw, &ref_bg) && !read_bedgraph(&ref_bg).is_empty();
+
+ if !has_ref {
+ continue;
+ }
+ let rust_bw = outdir.join(format!("bigwig/{suffix}"));
+ assert!(rust_bw.exists(), "missing RustQC bigWig for {label}");
+ let rust_bg = root.join(format!("rust.{threads}.{suffix}.dec.bg"));
+ assert!(
+ bigwig_to_bedgraph(&rust_bw, &rust_bg),
+ "decode failed {label}"
+ );
+ assert_eq!(
+ read_bedgraph(&ref_bg),
+ read_bedgraph(&rust_bg),
+ "bedGraph mismatch for {label}"
+ );
+ }
+ }
+
+ let _ = fs::remove_dir_all(root);
+}
+
+/// Per-strand bigWig tracks are only produced when meaningful, and empty
+/// tracks are skipped rather than written as stubs. Uses a forward-only BAM:
+///
+/// - unstranded run -> only the combined `{sample}.bigWig`
+/// - `--stranded forward` run -> combined + forward present, but the reverse
+/// track has zero coverage and so its file is omitted
+///
+/// Tool-independent: asserts file presence/absence, not coverage values.
+#[test]
+fn test_bigwig_per_strand_track_presence() {
+ let root = unique_test_dir("bigwig-presence");
+ let bam = root.join("fwdonly.bam");
+ let gtf = root.join("fwdonly.gtf");
+
+ // Two forward single-end reads (FLAG 0): all coverage is on the + strand.
+ write_bam_fixture(
+ &bam,
+ &[("chrP", 1000)],
+ &[
+ "f1\t0\tchrP\t11\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ "f2\t0\tchrP\t101\t60\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
+ ],
+ );
+ write_test_gtf(>f, &["chrP"]);
+ let binary = rustqc_binary();
+
+ let run = |outdir: &Path, stranded: Option<&str>| {
+ let mut args = vec![
+ "rna",
+ bam.to_str().unwrap(),
+ "--gtf",
+ gtf.to_str().unwrap(),
+ "--outdir",
+ outdir.to_str().unwrap(),
+ "--skip-dup-check",
+ ];
+ if let Some(s) = stranded {
+ args.push("--stranded");
+ args.push(s);
+ }
+ let output = Command::new(&binary).args(&args).output().expect("rustqc");
+ assert!(
+ output.status.success(),
+ "rustqc failed:\n{}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+ };
+
+ // Unstranded: combined only, no per-strand tracks.
+ let unstranded = root.join("unstranded");
+ run(&unstranded, None);
+ assert!(unstranded.join("bigwig/fwdonly.bigWig").exists());
+ assert!(!unstranded.join("bigwig/fwdonly.forward.bigWig").exists());
+ assert!(!unstranded.join("bigwig/fwdonly.reverse.bigWig").exists());
+
+ // Stranded forward: forward populated, reverse empty -> file skipped.
+ let forward = root.join("forward");
+ run(&forward, Some("forward"));
+ assert!(forward.join("bigwig/fwdonly.bigWig").exists());
+ assert!(forward.join("bigwig/fwdonly.forward.bigWig").exists());
+ assert!(
+ !forward.join("bigwig/fwdonly.reverse.bigWig").exists(),
+ "empty reverse track should be skipped, not written as a stub"
+ );
+
+ let _ = fs::remove_dir_all(root);
+}