diff --git a/.cargo/audit.toml b/.cargo/audit.toml index 32fc092..7dc4bbc 100644 --- a/.cargo/audit.toml +++ b/.cargo/audit.toml @@ -2,8 +2,4 @@ # https://github.com/rustsec/rustsec/tree/main/cargo-audit [advisories] -# custom_derive v0.1.7 — unmaintained crate pulled in transitively by -# rust-htslib v1.0.0. There is no newer rust-htslib release that drops it, -# and this is an "unmaintained" notice, not a vulnerability. -# Re-evaluate when rust-htslib releases a version without custom_derive. -ignore = ["RUSTSEC-2025-0058"] +ignore = [] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e33782d..d7849f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ permissions: env: CARGO_TERM_COLOR: always + CXX: g++ jobs: test: @@ -40,12 +41,11 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \ - libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang + sudo apt-get install -y g++ libfontconfig1-dev pkg-config - name: Install macOS build deps if: runner.os == 'macOS' - run: brew install bzip2 xz + run: brew install fontconfig - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # ratchet:Swatinem/rust-cache@v2.9.1 @@ -97,8 +97,7 @@ jobs: - name: Install Linux build deps run: | sudo apt-get update - sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \ - libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang + sudo apt-get install -y g++ libfontconfig1-dev pkg-config - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # ratchet:Swatinem/rust-cache@v2.9.1 @@ -115,13 +114,12 @@ jobs: - name: Install Rust MSRV toolchain uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # v1 with: - toolchain: "1.87" + toolchain: "1.89" - name: Install Linux build deps run: | sudo apt-get update - sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \ - libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang + sudo apt-get install -y g++ libfontconfig1-dev pkg-config - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # ratchet:Swatinem/rust-cache@v2.9.1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2f404bb..72b2700 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,6 +12,7 @@ permissions: env: CARGO_TERM_COLOR: always + CXX: g++ IMAGE_NAME: ${{ github.repository }} jobs: @@ -134,13 +135,12 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \ - libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang + sudo apt-get install -y g++ libfontconfig1-dev pkg-config # macOS build dependencies - name: Install macOS build deps if: runner.os == 'macOS' - run: brew install bzip2 xz + run: brew install fontconfig - name: Build run: > diff --git a/AGENTS.md b/AGENTS.md index 108414a..78f3420 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -244,7 +244,7 @@ To prepare a release: | Crate | Purpose | | ---------------------- | ------------------------------------- | | `clap` v4 | CLI argument parsing (derive) | -| `rust-htslib` | BAM file I/O (statically linked) | +| `noodles` | BAM/SAM/CRAM file I/O (pure Rust) | | `plotters` | Chart generation (PNG + SVG) | | `serde` | YAML config deserialization | | `anyhow` | Error handling | @@ -282,7 +282,7 @@ forwarded to `count_reads()` as the `skip_dup_check: bool` parameter). - Test data is generated by `tests/create_test_data.R` — do not modify `tests/expected/` by hand. - Float output formatting must match R's behavior (15 significant digits, "NA" for NaN, trailing-zero trimming). - The pipeline processes BAM files which can be very large — performance matters. -- System dependencies needed for building: cmake, zlib, bz2, lzma, curl, ssl, clang (for `rust-htslib`). +- System dependencies needed for building: a C++ compiler (for the preseq RNG FFI shim), libfontconfig (for plot rendering). noodles handles BAM/SAM/CRAM I/O in pure Rust with no htslib/cmake dependency. - Benchmark results are produced by the [RustQC-benchmarks](https://github.com/seqeralabs/RustQC-benchmarks) Nextflow pipeline. When benchmarks are re-run, verify that all results referenced in the docs and the top-level `README.md` are updated to reflect the new numbers. diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b67cce..b2c3552 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # RustQC Changelog +## [Unreleased] + +### Other changes + +- Migrate BAM/SAM/CRAM I/O from rust-htslib to pure-Rust [noodles](https://crates.io/crates/noodles) (#113) +- Simplify build requirements: no cmake or htslib system libraries needed +- Bump MSRV to Rust 1.89 (required by noodles 0.111) + ## [Version 0.2.1](https://github.com/seqeralabs/RustQC/releases/tag/v0.2.1) - 2026-04-09 ### Bug fixes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 181de96..337c500 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,24 +6,18 @@ Thanks for your interest in contributing to RustQC! This document covers how to ### Prerequisites -You need a working Rust toolchain (stable) and the following system libraries for building `rust-htslib`: - -- cmake, clang -- zlib, libbz2, liblzma -- libcurl, libssl -- libfontconfig (for plot rendering) +You need a working Rust toolchain (stable), a C++ compiler (for the preseq RNG FFI shim), and libfontconfig (for plot rendering). BAM/SAM/CRAM I/O is handled by the pure-Rust [noodles](https://crates.io/crates/noodles) crate — no htslib or cmake required. On Ubuntu/Debian: ```bash -sudo apt-get install cmake clang zlib1g-dev libbz2-dev liblzma-dev \ - libcurl4-openssl-dev libssl-dev libfontconfig1-dev +sudo apt-get install g++ libfontconfig1-dev ``` On macOS (with Homebrew): ```bash -brew install cmake xz +brew install fontconfig ``` ### Building diff --git a/Cargo.lock b/Cargo.lock index da20215..c77b341 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -89,36 +95,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "bindgen" -version = "0.69.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" -dependencies = [ - "bitflags 2.11.0", - "cexpr", - "clang-sys", - "itertools", - "lazy_static", - "lazycell", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn", -] - -[[package]] -name = "bio-types" -version = "1.0.4" +name = "bit-vec" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4dcf54f8b7f51450207d54780bab09c05f30b8b0caa991545082842e466ad7e" +checksum = "b71798fca2c1fe1086445a7258a4bc81e6e49dcd24c8d0dd9a1e57395b603f51" dependencies = [ - "derive-new 0.6.0", - "lazy_static", - "regex", - "strum_macros", - "thiserror 1.0.69", + "serde", ] [[package]] @@ -133,6 +115,25 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "block-buffer" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -152,13 +153,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "bzip2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ - "cc", - "pkg-config", + "libbz2-rs-sys", ] [[package]] @@ -168,20 +174,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ "find-msvc-tools", - "jobserver", - "libc", "shlex", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -212,17 +207,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" version = "4.6.0" @@ -251,7 +235,7 @@ version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -263,15 +247,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" -[[package]] -name = "cmake" -version = "0.1.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" -dependencies = [ - "cc", -] - [[package]] name = "coitrees" version = "0.4.0" @@ -303,6 +278,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "core-foundation" version = "0.9.4" @@ -373,6 +354,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -399,46 +389,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] -name = "curl-sys" -version = "0.4.87+curl-8.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a460380f0ef783703dcbe909107f39c162adeac050d73c850055118b5b6327" -dependencies = [ - "cc", - "libc", - "libz-sys", - "openssl-sys", - "pkg-config", - "vcpkg", - "windows-sys 0.59.0", -] - -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" - -[[package]] -name = "derive-new" -version = "0.6.0" +name = "crypto-common" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" dependencies = [ - "proc-macro2", - "quote", - "syn", + "hybrid-array", ] [[package]] -name = "derive-new" -version = "0.7.0" +name = "digest" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "proc-macro2", - "quote", - "syn", + "block-buffer", + "const-oid", + "crypto-common", ] [[package]] @@ -462,17 +429,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "dlib" version = "0.5.3" @@ -558,6 +514,7 @@ checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -624,15 +581,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - [[package]] name = "freetype-sys" version = "0.20.1" @@ -644,15 +592,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "fs-utils" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fc7a9dc005c944c98a935e7fd626faf5bf7e5a609f94bc13e42fc4a02e52593" -dependencies = [ - "quick-error", -] - [[package]] name = "getrandom" version = "0.2.17" @@ -664,18 +603,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi 5.3.0", - "wasip2", -] - [[package]] name = "getrandom" version = "0.4.2" @@ -684,7 +611,7 @@ checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi 6.0.0", + "r-efi", "rand_core", "wasip2", "wasip3", @@ -700,12 +627,6 @@ dependencies = [ "weezl", ] -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - [[package]] name = "hashbrown" version = "0.15.5" @@ -721,6 +642,12 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -728,20 +655,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "hts-sys" -version = "2.2.0" +name = "hybrid-array" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e38d7f1c121cd22aa214cb4dadd4277dc5447391eac518b899b29ba6356fbbb2" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" dependencies = [ - "bindgen", - "bzip2-sys", - "cc", - "curl-sys", - "fs-utils", - "glob", - "libz-sys", - "lzma-sys", - "openssl-sys", + "typenum", ] [[package]] @@ -768,120 +687,12 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" - -[[package]] -name = "icu_properties" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" - -[[package]] -name = "icu_provider" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - [[package]] name = "id-arena" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "ieee754" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9007da9cacbd3e6343da136e98b0d2df013f553d35bdec8b518f07bea768e19c" - [[package]] name = "image" version = "0.24.9" @@ -927,15 +738,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.18" @@ -966,16 +768,6 @@ dependencies = [ "syn", ] -[[package]] -name = "jobserver" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" -dependencies = [ - "getrandom 0.3.4", - "libc", -] - [[package]] name = "jpeg-decoder" version = "0.3.2" @@ -998,18 +790,75 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "leb128fmt" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "libbz2-rs-sys" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34b357333733e8260735ba5894eb928c02ecc69c78715f01a8019e7fa7f2db4c" + [[package]] name = "libc" version = "0.2.184" @@ -1036,86 +885,155 @@ dependencies = [ ] [[package]] -name = "libz-sys" -version = "1.1.25" +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lzma-rust2" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce716bf1a316f47a280fc76295f6495b5bea4752bca01c3b3885e101b1c23c02" +dependencies = [ + "sha2", +] + +[[package]] +name = "md-5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52f4c29e2a68ac30c9087e1b772dc9f44a2b66ed44edf2266cf2be9b03dafc1" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" dependencies = [ - "cc", - "cmake", - "libc", - "pkg-config", - "vcpkg", + "cfg-if", + "digest", ] [[package]] -name = "linear-map" -version = "1.2.0" +name = "memchr" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] -name = "litemap" -version = "0.8.1" +name = "miniz_oxide" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] [[package]] -name = "log" -version = "0.4.29" +name = "noodles" +version = "0.111.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "78906b00d2b2d144c920567724ab0dc68ef8da7fc258ef18da86bbbec572000e" +dependencies = [ + "noodles-bam", + "noodles-bgzf", + "noodles-core", + "noodles-cram", + "noodles-csi", + "noodles-fasta", + "noodles-sam", +] [[package]] -name = "lzma-sys" -version = "0.1.20" +name = "noodles-bam" +version = "0.90.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +checksum = "4d319ea3e4414172455eec82f0283ae3fc6a5a8e9b23bdc16ee426986a615094" dependencies = [ - "cc", - "libc", - "pkg-config", + "bstr", + "indexmap", + "memchr", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "noodles-sam", ] [[package]] -name = "memchr" -version = "2.8.0" +name = "noodles-bgzf" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "4d22589ec50582fa0c3e629d27e5263fc5ff5d436955648ba601b7ac4155fbf2" +dependencies = [ + "bytes", + "crossbeam-channel", + "zlib-rs", +] [[package]] -name = "minimal-lexical" -version = "0.2.1" +name = "noodles-core" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8dbac7c5f9a7de9fe45590f198a09697df631cd13d2060b4742cc48144555b0" +dependencies = [ + "bstr", +] + +[[package]] +name = "noodles-cram" +version = "0.94.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +checksum = "267b2934c706c2372af2eefde6c936cc5fbf26a899f5844cc48d62ea51cbd933" +dependencies = [ + "bitflags 2.11.0", + "bstr", + "bzip2", + "flate2", + "indexmap", + "lexical-core", + "lzma-rust2", + "md-5", + "noodles-bam", + "noodles-core", + "noodles-fasta", + "noodles-sam", +] [[package]] -name = "miniz_oxide" -version = "0.8.9" +name = "noodles-csi" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +checksum = "6832254d731cb022d46927ce64403221b280b17140516cafa21e43ee4140d633" dependencies = [ - "adler2", - "simd-adler32", + "bit-vec", + "bstr", + "indexmap", + "noodles-bgzf", + "noodles-core", ] [[package]] -name = "newtype_derive" -version = "0.1.6" +name = "noodles-fasta" +version = "0.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" +checksum = "c36aecf3899ba8ca698bddee3c7264d6e70afca6620524ece230d2c34b93021b" dependencies = [ - "rustc_version 0.1.7", + "bstr", + "memchr", + "noodles-bgzf", + "noodles-core", ] [[package]] -name = "nom" -version = "7.1.3" +name = "noodles-sam" +version = "0.85.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "fbaf538bea4f886de8b3fb611784a13f82c9f8e08e942849e88ec44234674512" dependencies = [ + "bitflags 2.11.0", + "bstr", + "indexmap", + "lexical-core", "memchr", - "minimal-lexical", + "noodles-bgzf", + "noodles-core", + "noodles-csi", ] [[package]] @@ -1146,32 +1064,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] -name = "openssl-src" -version = "300.5.5+3.5.5" +name = "option-ext" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f1787d533e03597a7934fd0a765f0d28e94ecc5fb7789f8053b1e699a56f709" -dependencies = [ - "cc", -] +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] -name = "openssl-sys" -version = "0.9.112" +name = "ouroboros" +version = "0.18.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" +checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" dependencies = [ - "cc", - "libc", - "openssl-src", - "pkg-config", - "vcpkg", + "aliasable", + "ouroboros_macro", + "static_assertions", ] [[package]] -name = "option-ext" -version = "0.2.0" +name = "ouroboros_macro" +version = "0.18.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] [[package]] name = "pathfinder_geometry" @@ -1189,15 +1109,9 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf9027960355bf3afff9841918474a81a5f972ac6d226d518060bba758b5ad57" dependencies = [ - "rustc_version 0.4.1", + "rustc_version", ] -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - [[package]] name = "pkg-config" version = "0.3.32" @@ -1278,15 +1192,6 @@ dependencies = [ "portable-atomic", ] -[[package]] -name = "potential_utf" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" -dependencies = [ - "zerovec", -] - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1316,10 +1221,17 @@ dependencies = [ ] [[package]] -name = "quick-error" -version = "1.2.3" +name = "proc-macro2-diagnostics" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] [[package]] name = "quote" @@ -1330,12 +1242,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - [[package]] name = "r-efi" version = "6.0.0" @@ -1397,7 +1303,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.17", "libredox", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -1429,50 +1335,13 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" -[[package]] -name = "rust-htslib" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f22161678c3d72e6434c5f3383325dbf88c3cacce665f0c7b4b077fc6e957ba9" -dependencies = [ - "bio-types", - "byteorder", - "custom_derive", - "derive-new 0.7.0", - "hts-sys", - "ieee754", - "lazy_static", - "libc", - "libz-sys", - "linear-map", - "newtype_derive", - "regex", - "thiserror 2.0.18", - "url", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc_version" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" -dependencies = [ - "semver 0.1.20", -] - [[package]] name = "rustc_version" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "semver 1.0.27", + "semver", ] [[package]] @@ -1491,14 +1360,15 @@ dependencies = [ "indexmap", "indicatif", "log", + "noodles", "number_prefix", + "ouroboros", "plotters", "plotters-backend", "plotters-svg", "rand", "rand_chacha", "rayon", - "rust-htslib", "serde", "serde_json", "serde_yaml_ng", @@ -1525,12 +1395,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "semver" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" - [[package]] name = "semver" version = "1.0.27" @@ -1593,6 +1457,17 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1606,16 +1481,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" @@ -1623,19 +1492,6 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - [[package]] name = "syn" version = "2.0.117" @@ -1647,44 +1503,13 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.18", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "thiserror-impl", ] [[package]] @@ -1698,22 +1523,18 @@ dependencies = [ "syn", ] -[[package]] -name = "tinystr" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "ttf-parser" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1738,24 +1559,6 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" @@ -1763,10 +1566,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] -name = "vcpkg" -version = "0.2.15" +name = "version_check" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -1878,7 +1681,7 @@ dependencies = [ "bitflags 2.11.0", "hashbrown 0.15.5", "indexmap", - "semver 1.0.27", + "semver", ] [[package]] @@ -2104,7 +1907,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -2115,7 +1918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap", "prettyplease", "syn", @@ -2168,7 +1971,7 @@ dependencies = [ "id-arena", "indexmap", "log", - "semver 1.0.27", + "semver", "serde", "serde_derive", "serde_json", @@ -2177,10 +1980,10 @@ dependencies = [ ] [[package]] -name = "writeable" -version = "0.6.2" +name = "yansi" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yeslogic-fontconfig-sys" @@ -2193,29 +1996,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "yoke" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - [[package]] name = "zerocopy" version = "0.8.48" @@ -2237,58 +2017,10 @@ dependencies = [ ] [[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerotrie" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.2" +name = "zlib-rs" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" [[package]] name = "zmij" diff --git a/Cargo.toml b/Cargo.toml index ebf02f0..776c9b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "rustqc" version = "0.2.1" edition = "2021" -rust-version = "1.87" +rust-version = "1.89" authors = ["Phil Ewels ", "Jon Manning "] description = "Fast RNA-seq QC in a single pass: dupRadar, featureCounts, 8 RSeQC tools, preseq, samtools stats, and Qualimap — reimplemented in Rust" license = "GPL-3.0-or-later" @@ -25,8 +25,13 @@ path = "src/main.rs" # CLI argument parsing clap = { version = "4", features = ["derive", "env"] } -# BAM file reading -rust-htslib = { version = "1", features = ["static"] } +# BAM file reading (pure Rust, replaces rust-htslib) +noodles = { version = "0.111", features = ["bam", "sam", "cram", "csi", "bgzf", "core", "fasta"] } + +# Self-referential structs: lets the CRAM backends hold a noodles streaming +# iterator alongside the reader it borrows, so CRAM reads stream one container +# at a time instead of buffering every record up front. +ouroboros = "0.18" # Plotting plotters = { version = "0.3", features = ["fontconfig-dlopen"] } diff --git a/Dockerfile b/Dockerfile index 7a9f05b..29594c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,17 +2,13 @@ FROM rust:1-bookworm AS builder RUN apt-get update && apt-get install -y --no-install-recommends \ - cmake \ - zlib1g-dev \ - libbz2-dev \ - liblzma-dev \ - libcurl4-openssl-dev \ - libssl-dev \ + g++ \ libfontconfig1-dev \ pkg-config \ - clang \ && rm -rf /var/lib/apt/lists/* +ENV CXX=g++ + ARG GIT_SHORT_HASH=unknown ARG CPU_TARGET="" diff --git a/docs/src/content/docs/about/contributing.md b/docs/src/content/docs/about/contributing.md index 39346a6..e3efbb8 100644 --- a/docs/src/content/docs/about/contributing.md +++ b/docs/src/content/docs/about/contributing.md @@ -20,7 +20,7 @@ cargo build --release ``` See the [Installation](../getting-started/installation/) page for system -dependency requirements (cmake, zlib, bz2, lzma, curl, ssl, clang). +dependency requirements (C++ compiler and libfontconfig). ## Running tests diff --git a/docs/src/content/docs/about/credits.mdx b/docs/src/content/docs/about/credits.mdx index 171086e..a5a06c9 100644 --- a/docs/src/content/docs/about/credits.mdx +++ b/docs/src/content/docs/about/credits.mdx @@ -104,7 +104,7 @@ RustQC is built with the following open-source Rust libraries: | Library | Purpose | |---------|---------| -| [rust-htslib](https://github.com/rust-bio/rust-htslib) | SAM/BAM/CRAM file I/O via htslib bindings | +| [noodles](https://crates.io/crates/noodles) | SAM/BAM/CRAM file I/O (pure Rust) | | [plotters](https://github.com/plotters-rs/plotters) | Plot generation (PNG and SVG) | | [clap](https://github.com/clap-rs/clap) | Command-line argument parsing | | [anyhow](https://github.com/dtolnay/anyhow) | Error handling | diff --git a/docs/src/content/docs/getting-started/installation.mdx b/docs/src/content/docs/getting-started/installation.mdx index 1acfd86..e3cc1ee 100644 --- a/docs/src/content/docs/getting-started/installation.mdx +++ b/docs/src/content/docs/getting-started/installation.mdx @@ -91,7 +91,12 @@ docker run -v $(pwd):/data ghcr.io/seqeralabs/rustqc:latest-sve \ ## Build from source -Building from source requires the Rust toolchain, a C++ compiler, and a few system libraries needed by [rust-htslib](https://github.com/rust-bio/rust-htslib) for SAM/BAM/CRAM I/O. The C++ compiler is used both by rust-htslib and by RustQC's preseq FFI shim (a small C++ wrapper that links against the host's `std::mt19937` and `std::binomial_distribution` for exact bootstrap compatibility with upstream preseq). +Building from source requires the Rust toolchain, a C++ compiler, and +libfontconfig (for plot rendering). SAM/BAM/CRAM I/O is handled by the pure-Rust +[noodles](https://crates.io/crates/noodles) crate — no htslib or cmake required. +The C++ compiler is used by RustQC's preseq FFI shim (a small C++ wrapper that +links against the host's `std::mt19937` and `std::binomial_distribution` for exact +bootstrap compatibility with upstream preseq). ### Install from crates.io @@ -109,7 +114,7 @@ Select your operating system for build instructions: ```bash # Install system dependencies - brew install cmake zlib bzip2 xz curl openssl + brew install fontconfig # Install Rust (if not already installed) curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh @@ -127,8 +132,7 @@ Select your operating system for build instructions: ```bash # Install system dependencies - sudo apt install cmake zlib1g-dev libbz2-dev liblzma-dev \ - libcurl4-openssl-dev libssl-dev clang + sudo apt install g++ libfontconfig1-dev pkg-config # Install Rust (if not already installed) curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh @@ -144,8 +148,7 @@ Select your operating system for build instructions: ```bash # Install system dependencies - sudo dnf install cmake zlib-devel bzip2-devel xz-devel \ - libcurl-devel openssl-devel clang + sudo dnf install gcc-c++ fontconfig-devel pkg-config # Install Rust (if not already installed) curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh @@ -162,7 +165,7 @@ Select your operating system for build instructions: The compiled binary is at `target/release/rustqc`. You can copy it to a directory on your `PATH` for convenient access. Release builds use link-time optimization (LTO) and symbol stripping for better performance and a smaller binary. -The system dependencies are: cmake, zlib, bz2, lzma, curl, ssl, and a C++ compiler (clang or g++). +The system dependencies are: a C++ compiler (`g++` or `clang++`) and libfontconfig. ### Extra performance diff --git a/docs/src/content/docs/rna/rseqc.mdx b/docs/src/content/docs/rna/rseqc.mdx index 0cf6434..f63d7ef 100644 --- a/docs/src/content/docs/rna/rseqc.mdx +++ b/docs/src/content/docs/rna/rseqc.mdx @@ -568,8 +568,8 @@ RustQC's TIN output uses the same file format and column names as RSeQC's TIN scores may differ from RSeQC's `tin.py`. Both tools sample identical positions and apply identical flag filters, but they count per-position coverage differently: upstream uses pysam's pileup engine (`bam_plp_auto`) - while RustQC uses CIGAR-derived aligned blocks via the BAM index. The htslib - pileup engine uses a different internal read traversal that can produce + while RustQC uses CIGAR-derived aligned blocks via the BAM index. pysam's pileup + engine uses a different internal read traversal that can produce different coverage counts at certain positions, leading to per-transcript TIN differences. Summary-level statistics (mean, median, stdev) remain closely aligned across all benchmarks. Per-transcript accuracy is an active area of diff --git a/docs/src/content/docs/usage/library.mdx b/docs/src/content/docs/usage/library.mdx index 22215be..ac630da 100644 --- a/docs/src/content/docs/usage/library.mdx +++ b/docs/src/content/docs/usage/library.mdx @@ -19,10 +19,11 @@ Full API reference: **[docs.rs/rustqc](https://docs.rs/rustqc)**. rustqc = "0.2.1" # Or whatever the latest release is ``` -`rust-htslib` is linked statically and a small C++ component (used by the preseq -tool) is built from source, so a working C/C++ toolchain (`cc`, `c++`) is -required when building. No runtime dependencies are added beyond what the binary -already needs. +SAM/BAM/CRAM I/O is handled by the pure-Rust [noodles](https://crates.io/crates/noodles) +crate. A small C++ component (used by the preseq tool) is built from source, so a +working C++ compiler (`g++` or `clang++`) is required when building. No htslib or +cmake dependencies are needed. No runtime dependencies are added beyond what the +binary already needs. ## What's in the library diff --git a/src/lib.rs b/src/lib.rs index 9a228ca..a87769c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,9 +12,8 @@ //! rustqc = "0.2" //! ``` //! -//! The library pulls in `rust-htslib` (linked statically), `plotters`, and -//! a small C++ component used by the preseq tool (built via `build.rs`), -//! so a working C/C++ toolchain is required at build time. +//! The library uses [noodles](https://crates.io/crates/noodles) for BAM/SAM/CRAM I/O +//! (pure Rust, no htslib dependency) and a small C++ component used by the preseq tool //! //! # Modules //! diff --git a/src/main.rs b/src/main.rs index 4c66c17..353c984 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,8 +26,6 @@ use rustqc::{config, cpu, gtf, rna, summary}; use ui::{Ui, Verbosity}; -use rust_htslib::bam::Read as BamRead; - use rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig}; /// Common BAM filename suffixes added by alignment and duplicate-marking tools. @@ -261,7 +259,7 @@ fn run_rna(args: cli::RnaArgs, ui: &Ui) -> Result<()> { // Validate all input alignment files before expensive GTF parsing for bam_path in &args.input { - let mut reader = rust_htslib::bam::Reader::from_path(bam_path) + let mut reader = crate::rna::bam::Reader::from_path(bam_path) .with_context(|| format!("Cannot open alignment file '{}'", bam_path))?; if let Some(ref reference) = args.reference { reader @@ -1428,7 +1426,7 @@ fn process_single_bam( }); // Extract BAM header info (reference names + lengths) for samtools-compatible outputs let bam_header_refs = { - let reader = rust_htslib::bam::Reader::from_path(bam_path) + let reader = crate::rna::bam::Reader::from_path(bam_path) .with_context(|| format!("Failed to open BAM for header: {}", bam_path))?; let header = reader.header(); (0..header.target_count()) diff --git a/src/rna/bam/align_header.rs b/src/rna/bam/align_header.rs new file mode 100644 index 0000000..cf0194f --- /dev/null +++ b/src/rna/bam/align_header.rs @@ -0,0 +1,66 @@ +//! SAM/BAM header wrapper. + +/// SAM header wrapper with rust-htslib-compatible helpers. +#[derive(Debug, Clone)] +pub struct Header { + pub(crate) inner: noodles::sam::Header, + ref_names: Vec, + ref_lengths: Vec, +} + +impl Header { + /// Wrap a noodles SAM header. + pub fn from_noodles(inner: noodles::sam::Header) -> Self { + let mut ref_names = Vec::new(); + let mut ref_lengths = Vec::new(); + for (name, map) in inner.reference_sequences() { + ref_names.push(String::from_utf8_lossy(name.as_ref()).into_owned()); + let len = u64::try_from(usize::from(map.length())).unwrap_or(0); + ref_lengths.push(len); + } + Self { + inner, + ref_names, + ref_lengths, + } + } + + /// Create an empty header (rust-htslib-compatible constructor). + pub fn new() -> Self { + Self::empty() + } + + /// Create an empty header. + pub fn empty() -> Self { + Self::from_noodles(noodles::sam::Header::default()) + } + + /// Number of reference sequences. + pub fn target_count(&self) -> u32 { + self.ref_names.len() as u32 + } + + /// Reference name for a target ID. + pub fn tid2name(&self, tid: u32) -> &[u8] { + self.ref_names + .get(tid as usize) + .map(|s| s.as_bytes()) + .unwrap_or(b"*") + } + + /// Reference length for a target ID. + pub fn target_len(&self, tid: u32) -> Option { + self.ref_lengths.get(tid as usize).copied() + } + + /// Borrow the underlying noodles header. + pub fn noodles_header(&self) -> &noodles::sam::Header { + &self.inner + } +} + +impl Default for Header { + fn default() -> Self { + Self::empty() + } +} diff --git a/src/rna/bam/align_record.rs b/src/rna/bam/align_record.rs new file mode 100644 index 0000000..89e7b72 --- /dev/null +++ b/src/rna/bam/align_record.rs @@ -0,0 +1,342 @@ +//! BAM record wrapper with a rust-htslib-compatible API backed by noodles. + +use std::io; + +use anyhow::{Context, Result}; +use noodles::bam as noodles_bam; +use noodles::sam::alignment::record::data::field::Tag; +use noodles::sam::alignment::record_buf::data::field::Value; +use noodles::sam::alignment::Record as AlignmentRecord; +use noodles::sam::alignment::RecordBuf; + +use super::cigar::{decode_op, decode_op_from_op, Cigar, CigarStringView}; + +/// SAM/BAM auxiliary tag value (integer variants used by RustQC). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Aux { + U8(u8), + U16(u16), + U32(u32), + I8(i8), + I16(i16), + I32(i32), +} + +/// Sequence view with htslib-compatible base encoding. +pub struct Seq<'a> { + seq_packed: &'a [u8], + seq_len: usize, +} + +impl<'a> Seq<'a> { + pub fn len(&self) -> usize { + self.seq_len + } + + pub fn is_empty(&self) -> bool { + self.seq_len == 0 + } + + /// BAM 4-bit encoded sequence bytes for samtools-compatible CHK checksums. + pub fn encoded_bytes(&self) -> &[u8] { + let nbytes = self.seq_len.div_ceil(2); + &self.seq_packed[..nbytes.min(self.seq_packed.len())] + } + + /// BAM 4-bit encoded base at `i` (A=1, C=2, G=4, T=8, N=15). + pub fn encoded_base(&self, i: usize) -> u8 { + if i >= self.seq_len { + return 15; + } + let byte = self.seq_packed[i / 2]; + if i.is_multiple_of(2) { + byte >> 4 + } else { + byte & 0x0f + } + } + + pub fn as_bytes(&self) -> Vec { + (0..self.seq_len) + .map(|i| b"=ACMGRSVTWYHKDBN"[self.encoded_base(i) as usize]) + .collect() + } +} + +/// A BAM alignment record with rust-htslib-compatible accessors. +#[derive(Debug, Clone)] +pub struct Record { + inner: RecordBuf, + seq_packed: Vec, + qual_cache: Vec, + cigar_cache: Vec, +} + +impl Default for Record { + fn default() -> Self { + Self::new() + } +} + +impl Record { + pub fn new() -> Self { + Self { + inner: RecordBuf::default(), + seq_packed: Vec::new(), + qual_cache: Vec::new(), + cigar_cache: Vec::new(), + } + } + + pub fn set_buf(&mut self, buf: RecordBuf, seq_packed: Vec) { + self.inner = buf; + self.seq_packed = seq_packed; + self.refresh_caches(); + } + + fn refresh_caches(&mut self) { + self.qual_cache = self.inner.quality_scores().as_ref().to_vec(); + self.cigar_cache = self + .inner + .cigar() + .as_ref() + .iter() + .map(|&op| decode_op_from_op(op)) + .collect(); + } + + pub(crate) fn from_bam( + header: &noodles::sam::Header, + bam: &noodles_bam::Record, + ) -> Result { + let mut inner = RecordBuf::default(); + inner + .try_clone_from_alignment_record(header, bam) + .context("failed to convert BAM record")?; + let cigar_cache = bam + .cigar() + .iter() + .map(decode_op) + .collect::, io::Error>>() + .context("failed to decode BAM CIGAR")?; + Ok(Self { + inner, + seq_packed: bam.sequence().as_bytes().to_vec(), + qual_cache: bam.quality_scores().as_ref().to_vec(), + cigar_cache, + }) + } + + pub(crate) fn from_buf(buf: RecordBuf, seq_packed: Option>) -> Self { + let seq_packed = seq_packed.unwrap_or_else(|| pack_ascii_sequence(buf.sequence().as_ref())); + let mut record = Self { + inner: buf, + seq_packed, + qual_cache: Vec::new(), + cigar_cache: Vec::new(), + }; + record.refresh_caches(); + record + } + + pub fn flags(&self) -> u16 { + self.inner.flags().bits() + } + + pub fn tid(&self) -> i32 { + self.inner + .reference_sequence_id() + .map(|id| id as i32) + .unwrap_or(-1) + } + + pub fn pos(&self) -> i64 { + self.inner + .alignment_start() + .map(|p| i64::try_from(usize::from(p) - 1).unwrap_or(-1)) + .unwrap_or(-1) + } + + pub fn mtid(&self) -> i32 { + self.inner + .mate_reference_sequence_id() + .map(|id| id as i32) + .unwrap_or(-1) + } + + pub fn mpos(&self) -> i64 { + self.inner + .mate_alignment_start() + .map(|p| i64::try_from(usize::from(p) - 1).unwrap_or(-1)) + .unwrap_or(-1) + } + + pub fn mapq(&self) -> u8 { + self.inner + .mapping_quality() + .map(|mq| mq.get()) + .unwrap_or(255) + } + + pub fn qname(&self) -> &[u8] { + self.inner.name().map(|n| n.as_ref()).unwrap_or(b"*") + } + + pub fn seq_len(&self) -> i32 { + i32::try_from(self.inner.sequence().len()).unwrap_or(0) + } + + pub fn insert_size(&self) -> i32 { + self.inner.template_length() + } + + pub fn cigar(&self) -> CigarStringView { + CigarStringView::from_ops(self.cigar_cache.clone(), self.pos().max(0) as u64) + } + + pub fn seq(&self) -> Seq<'_> { + Seq { + seq_packed: &self.seq_packed, + seq_len: self.inner.sequence().len(), + } + } + + pub fn qual(&self) -> &[u8] { + &self.qual_cache + } + + pub fn aux(&self, tag: &[u8]) -> Result { + if tag.len() != 2 { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "aux tag must be 2 bytes", + )); + } + let tag = Tag::new(tag[0], tag[1]); + match self.inner.data().get(&tag) { + Some(Value::Int8(v)) => Ok(Aux::I8(*v)), + Some(Value::Int16(v)) => Ok(Aux::I16(*v)), + Some(Value::Int32(v)) => Ok(Aux::I32(*v)), + Some(Value::UInt8(v)) => Ok(Aux::U8(*v)), + Some(Value::UInt16(v)) => Ok(Aux::U16(*v)), + Some(Value::UInt32(v)) => Ok(Aux::U32(*v)), + Some(_) => Err(io::Error::new( + io::ErrorKind::InvalidData, + "non-integer aux tag", + )), + None => Err(io::Error::new(io::ErrorKind::NotFound, "aux tag not found")), + } + } + + pub fn is_paired(&self) -> bool { + self.inner.flags().is_segmented() + } + + pub fn is_reverse(&self) -> bool { + self.inner.flags().is_reverse_complemented() + } + + pub fn is_first_in_template(&self) -> bool { + self.inner.flags().is_first_segment() + } + + pub fn is_unmapped(&self) -> bool { + self.inner.flags().is_unmapped() + } + + pub fn is_mate_unmapped(&self) -> bool { + self.inner.flags().is_mate_unmapped() + } + + pub fn is_secondary(&self) -> bool { + self.inner.flags().is_secondary() + } + + pub fn is_supplementary(&self) -> bool { + self.inner.flags().is_supplementary() + } + + pub fn is_quality_check_failed(&self) -> bool { + self.inner.flags().is_qc_fail() + } +} +fn pack_ascii_sequence(seq: &[u8]) -> Vec { + // Full htslib `seq_nt16_table` mapping (the inverse of the `=ACMGRSVTWYHKDBN` + // decode table), so IUPAC ambiguity codes round-trip through SAM/CRAM packing + // and produce samtools-identical CHK checksums. Unknown bytes fall back to N. + const TABLE: [u8; 256] = { + let mut t = [15u8; 256]; + let symbols = b"=ACMGRSVTWYHKDBN"; + let mut code = 0usize; + while code < symbols.len() { + let upper = symbols[code]; + t[upper as usize] = code as u8; + // Map the lowercase variant of each letter to the same code. + if upper.is_ascii_uppercase() { + t[upper.to_ascii_lowercase() as usize] = code as u8; + } + code += 1; + } + t + }; + let mut out = Vec::with_capacity(seq.len().div_ceil(2)); + for chunk in seq.chunks(2) { + let l = TABLE[chunk[0] as usize]; + let r = chunk.get(1).map(|b| TABLE[*b as usize]).unwrap_or(0); + out.push((l << 4) | r); + } + out +} + +pub(crate) fn alignment_to_record( + header: &noodles::sam::Header, + alignment: &dyn AlignmentRecord, +) -> Result { + let mut inner = RecordBuf::default(); + inner + .try_clone_from_alignment_record(header, alignment) + .context("failed to clone alignment record")?; + Ok(Record::from_buf(inner, None)) +} + +/// Export for writer tests that need BAM bytes from a SAM line. +pub(crate) fn record_buf_for_writer(record: &Record) -> &RecordBuf { + &record.inner +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn packs_all_iupac_codes_losslessly() { + // The full set of IUPAC symbols, in htslib 4-bit code order, must pack + // and decode back unchanged (the inverse of the `=ACMGRSVTWYHKDBN` table). + let seq = b"=ACMGRSVTWYHKDBN".to_vec(); + let packed = pack_ascii_sequence(&seq); + let view = Seq { + seq_packed: &packed, + seq_len: seq.len(), + }; + assert_eq!(view.as_bytes(), seq); + for code in 0u8..16 { + assert_eq!(view.encoded_base(code as usize), code); + } + } + + #[test] + fn packs_lowercase_like_uppercase() { + assert_eq!( + pack_ascii_sequence(b"acgtnmrn"), + pack_ascii_sequence(b"ACGTNMRN") + ); + } + + #[test] + fn packs_odd_length_trailing_nibble_is_zero() { + // Odd-length sequences leave the final low nibble as 0 (`=`), matching + // the BAM 4-bit packing layout. + let packed = pack_ascii_sequence(b"A"); + assert_eq!(packed, vec![0b0001_0000]); + } +} diff --git a/src/rna/bam/cigar.rs b/src/rna/bam/cigar.rs new file mode 100644 index 0000000..a6edb0f --- /dev/null +++ b/src/rna/bam/cigar.rs @@ -0,0 +1,128 @@ +//! CIGAR types matching the rust-htslib API surface used by RustQC. + +use noodles::sam::alignment::record::cigar::{op::Kind, Op}; + +/// A CIGAR operation, mirroring `rust_htslib::bam::record::Cigar`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Cigar { + /// Alignment match (`M`). + Match(u32), + /// Insertion (`I`). + Ins(u32), + /// Deletion (`D`). + Del(u32), + /// Skipped region / intron (`N`). + RefSkip(u32), + /// Soft clip (`S`). + SoftClip(u32), + /// Hard clip (`H`). + HardClip(u32), + /// Padding (`P`). + Pad(u32), + /// Sequence match (`=`). + Equal(u32), + /// Sequence mismatch (`X`). + Diff(u32), +} + +impl Cigar { + /// Length of this operation. + pub fn len(&self) -> u32 { + match self { + Self::Match(n) + | Self::Ins(n) + | Self::Del(n) + | Self::RefSkip(n) + | Self::SoftClip(n) + | Self::HardClip(n) + | Self::Pad(n) + | Self::Equal(n) + | Self::Diff(n) => *n, + } + } + + /// Whether this operation has zero length. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// Decode a noodles CIGAR operation into RustQC's CIGAR enum. +pub fn decode_op_from_op(op: Op) -> Cigar { + match op.kind() { + Kind::Match => Cigar::Match(op.len() as u32), + Kind::Insertion => Cigar::Ins(op.len() as u32), + Kind::Deletion => Cigar::Del(op.len() as u32), + Kind::Skip => Cigar::RefSkip(op.len() as u32), + Kind::SoftClip => Cigar::SoftClip(op.len() as u32), + Kind::HardClip => Cigar::HardClip(op.len() as u32), + Kind::Pad => Cigar::Pad(op.len() as u32), + Kind::SequenceMatch => Cigar::Equal(op.len() as u32), + Kind::SequenceMismatch => Cigar::Diff(op.len() as u32), + } +} + +/// Decode a fallible noodles CIGAR iterator item. +pub fn decode_op(op: Result) -> Result { + op.map(decode_op_from_op) +} + +/// Compute the 1-based exclusive reference end position using htslib semantics +/// (M, D, N, =, X consume reference; I, S, H, P do not). +pub fn reference_end(start: u64, ops: &[Cigar]) -> u64 { + let mut end = start; + for op in ops { + match op { + Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => { + end += u64::from(*len); + } + Cigar::Del(len) | Cigar::RefSkip(len) => { + end += u64::from(*len); + } + Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => {} + } + } + end +} + +/// Owned CIGAR string (used in unit tests). +#[derive(Debug, Clone, Default)] +pub struct CigarString(pub Vec); + +/// View over a decoded CIGAR with htslib-compatible helpers. +#[derive(Debug, Clone)] +pub struct CigarStringView { + ops: Vec, + start: u64, +} + +impl CigarStringView { + /// Build a view from decoded operations and a 0-based reference start. + pub(crate) fn from_ops(ops: Vec, start: u64) -> Self { + Self { ops, start } + } + + /// Build a view from an owned CIGAR string and reference start (0-based). + pub fn new(cigar: CigarString, start: u64) -> Self { + Self { + ops: cigar.0, + start, + } + } + + /// Iterate CIGAR operations. + pub fn iter(&self) -> impl Iterator { + self.ops.iter() + } + + /// Reference end position (htslib `end_pos` semantics). + pub fn end_pos(&self) -> u64 { + reference_end(self.start, &self.ops) + } + + /// Slice view of decoded operations (rust-htslib `CigarStringView::as_ref`). + #[allow(clippy::wrong_self_convention, clippy::should_implement_trait)] + pub fn as_ref(&self) -> &[Cigar] { + &self.ops + } +} diff --git a/src/rna/bam/io.rs b/src/rna/bam/io.rs new file mode 100644 index 0000000..6ebc23a --- /dev/null +++ b/src/rna/bam/io.rs @@ -0,0 +1,696 @@ +//! BAM/SAM/CRAM readers with a rust-htslib-compatible API. + +use std::fs::File; +use std::io::BufRead; +use std::path::{Path, PathBuf}; + +use super::align_header::Header; +use super::align_record::{alignment_to_record, Record}; +use anyhow::{Context, Result}; +use noodles::bam as noodles_bam; +use noodles::bgzf; +use noodles::bgzf::io::Seek as _; +use noodles::core::region::Interval; +use noodles::core::Region; +use noodles::cram as noodles_cram; +use noodles::csi::binning_index::index::reference_sequence::bin::Chunk; +use noodles::fasta as noodles_fasta; +use noodles::sam as noodles_sam; + +fn store_record(record: &mut Record, loaded: Record) -> Result<()> { + *record = loaded; + Ok(()) +} + +/// Fetch target for indexed readers. +#[derive(Debug, Clone, Copy)] +pub enum FetchDefinition { + /// Unmapped reads stored after mapped reads in the BAM file. + Unmapped, +} + +/// Fetch target accepted by [`IndexedReader::fetch`]. +#[derive(Debug, Clone, Copy)] +pub enum FetchTarget { + /// Reference sequence ID. + Tid(u32), + /// Unmapped reads bucket. + Unmapped, +} + +impl From for FetchTarget { + fn from(value: u32) -> Self { + Self::Tid(value) + } +} + +impl From for FetchTarget { + fn from(value: FetchDefinition) -> Self { + match value { + FetchDefinition::Unmapped => Self::Unmapped, + } + } +} + +/// Trait mirroring `rust_htslib::bam::Read`. +pub trait Read { + /// Read the next record into `record`. + fn read(&mut self, record: &mut Record) -> Option>; +} + +fn load_fasta_repository(path: &Path) -> Result { + let mut reader = noodles_fasta::io::reader::Builder + .build_from_path(path) + .with_context(|| format!("failed to open reference FASTA {}", path.display()))?; + let mut records = Vec::new(); + for result in reader.records() { + records.push(result.context("failed to read reference FASTA record")?); + } + Ok(noodles_fasta::Repository::new(records)) +} + +fn open_bam_reader(path: &Path) -> Result>> { + let file = + File::open(path).with_context(|| format!("failed to open BAM file {}", path.display()))?; + Ok(noodles_bam::io::Reader::new(file)) +} + +/// Owned iterator over decoded CRAM records. Boxed so the `query` and +/// `query_unmapped` iterators (which have distinct concrete types) can share one +/// field on the indexed cursor. +type CramRecordIter<'a> = + Box> + 'a>; + +/// Self-referential holder for a sequential CRAM reader plus its streaming record +/// iterator. +/// +/// noodles' `Records` iterator borrows the reader and header, so it cannot sit +/// next to them in a plain struct. ouroboros makes that borrow sound, letting us +/// stream one container at a time instead of collecting every record up front. +#[ouroboros::self_referencing] +struct CramReaderCursor { + reader: noodles_cram::io::Reader, + header: noodles_sam::Header, + #[borrows(mut reader, header)] + #[not_covariant] + iter: noodles_cram::io::reader::Records<'this, 'this, File>, +} + +/// Self-referential holder for an indexed CRAM reader plus the iterator from its +/// most recent `fetch`. Rebuilt on each fetch via `into_heads`; before the first +/// fetch the iterator is empty. +#[ouroboros::self_referencing] +struct CramIndexedCursor { + reader: noodles_cram::io::IndexedReader, + header: noodles_sam::Header, + #[borrows(mut reader, header)] + #[not_covariant] + iter: CramRecordIter<'this>, +} + +enum ReaderBackend { + Bam { + reader: noodles_bam::io::Reader>, + header: Header, + scratch: noodles_bam::Record, + }, + Sam { + reader: noodles_sam::io::Reader>, + header: Header, + scratch: noodles_sam::Record, + }, + Cram { + cursor: CramReaderCursor, + header: Header, + }, +} + +/// Sequential alignment file reader (BAM/SAM/CRAM). +pub struct Reader { + backend: ReaderBackend, + source_path: PathBuf, + cram_reference: Option, + decompression_threads: usize, +} + +impl Reader { + /// Open an alignment file, detecting the format from the extension. + pub fn from_path>(path: P) -> Result { + let path = path.as_ref().to_path_buf(); + let path_str = path.to_string_lossy(); + let backend = if path_str.ends_with(".sam") { + Self::open_sam(&path)? + } else if path_str.ends_with(".cram") { + Self::open_cram(&path, None)? + } else { + Self::open_bam(&path)? + }; + Ok(Self { + backend, + source_path: path, + cram_reference: None, + decompression_threads: 0, + }) + } + + fn open_bam(path: &Path) -> Result { + let mut reader = open_bam_reader(path)?; + let header = + Header::from_noodles(reader.read_header().context("failed to read BAM header")?); + Ok(ReaderBackend::Bam { + reader, + header, + scratch: noodles_bam::Record::default(), + }) + } + + fn open_sam(path: &Path) -> Result { + let mut reader = noodles_sam::io::reader::Builder::default() + .build_from_path(path) + .with_context(|| format!("failed to open SAM file {}", path.display()))?; + let header = + Header::from_noodles(reader.read_header().context("failed to read SAM header")?); + Ok(ReaderBackend::Sam { + reader, + header, + scratch: noodles_sam::Record::default(), + }) + } + + fn open_cram(path: &Path, reference: Option<&Path>) -> Result { + let repository = if let Some(fasta_path) = reference { + load_fasta_repository(fasta_path)? + } else { + noodles_fasta::Repository::default() + }; + let file = File::open(path) + .with_context(|| format!("failed to open CRAM file {}", path.display()))?; + let mut reader = noodles_cram::io::reader::Builder::default() + .set_reference_sequence_repository(repository) + .build_from_reader(file); + let sam_header = reader.read_header().context("failed to read CRAM header")?; + let header = Header::from_noodles(sam_header.clone()); + let cursor = CramReaderCursorBuilder { + reader, + header: sam_header, + iter_builder: |reader, header| reader.records(header), + } + .build(); + Ok(ReaderBackend::Cram { cursor, header }) + } + + /// Set the reference FASTA for CRAM decoding. + pub fn set_reference>(&mut self, path: P) -> Result<()> { + let path = path.as_ref().to_path_buf(); + self.cram_reference = Some(path.clone()); + if matches!(self.backend, ReaderBackend::Cram { .. }) { + self.backend = Self::open_cram(&self.source_path, Some(&path))?; + } + Ok(()) + } + + /// Enable BGZF decompression threads (BAM only). + /// + /// Multithreaded BGZF decompression is not yet wired through noodles; this + /// stores the requested thread count for API compatibility. + pub fn set_threads(&mut self, threads: usize) -> Result<()> { + self.decompression_threads = threads; + Ok(()) + } + + /// Return the SAM/BAM header. + pub fn header(&self) -> &Header { + match &self.backend { + ReaderBackend::Bam { header, .. } + | ReaderBackend::Sam { header, .. } + | ReaderBackend::Cram { header, .. } => header, + } + } +} + +impl Read for Reader { + fn read(&mut self, record: &mut Record) -> Option> { + match &mut self.backend { + ReaderBackend::Bam { + reader, + header, + scratch, + } => match reader.read_record(scratch) { + Ok(0) => None, + Ok(_) => Some( + Record::from_bam(header.noodles_header(), scratch) + .and_then(|loaded| store_record(record, loaded)), + ), + Err(e) => Some(Err(e.into())), + }, + ReaderBackend::Sam { + reader, + header, + scratch, + } => match reader.read_record(scratch) { + Ok(0) => None, + Ok(_) => Some( + alignment_to_record(header.noodles_header(), scratch) + .and_then(|loaded| store_record(record, loaded)), + ), + Err(e) => Some(Err(e.into())), + }, + ReaderBackend::Cram { cursor, .. } => match cursor.with_iter_mut(|iter| iter.next()) { + Some(Ok(buf)) => Some(store_record(record, Record::from_buf(buf, None))), + Some(Err(e)) => Some(Err(e.into())), + None => None, + }, + } + } +} + +/// Lazy streaming cursor for indexed BAM reads. +/// +/// Mirrors the chunk state machine in `noodles_csi::io::Query` so records are +/// decoded one at a time directly from the BGZF stream, instead of buffering an +/// entire reference sequence (or all unmapped reads) in memory. This matters +/// because dupRadar fetches one chromosome per worker thread concurrently, so +/// the old buffering cost scaled as reference size times thread count. +enum BamCursor { + /// No active fetch, or the current fetch has been exhausted. + Done, + /// Streaming the mapped records of a single reference via its index chunks. + Region { + chunks: std::vec::IntoIter, + /// End virtual position of the chunk currently being read; `None` means + /// the next chunk still needs to be seeked to. + chunk_end: Option, + reference_sequence_id: usize, + }, + /// Streaming unmapped records from a seeked position to end of file. + Unmapped, +} + +enum IndexedBackend { + Bam { + reader: noodles_bam::io::IndexedReader>, + header: Header, + scratch: noodles_bam::Record, + cursor: BamCursor, + }, + Cram { + /// `None` only transiently while a fetch rebuilds the cursor. + cursor: Option, + header: Header, + }, +} + +/// Indexed alignment file reader (BAM/CRAM with .bai/.csi/.crai). +pub struct IndexedReader { + backend: IndexedBackend, + source_path: PathBuf, + cram_reference: Option, +} + +impl IndexedReader { + /// Open an indexed alignment file. + pub fn from_path>(path: P) -> Result { + let path = path.as_ref().to_path_buf(); + let backend = if path.to_string_lossy().ends_with(".cram") { + let mut reader = noodles_cram::io::indexed_reader::Builder::default() + .build_from_path(&path) + .with_context(|| format!("failed to open indexed CRAM {}", path.display()))?; + let sam_header = reader + .read_header() + .context("failed to read indexed CRAM header")?; + let header = Header::from_noodles(sam_header.clone()); + let cursor = CramIndexedCursorBuilder { + reader, + header: sam_header, + iter_builder: |_reader, _header| Box::new(std::iter::empty()) as CramRecordIter, + } + .build(); + IndexedBackend::Cram { + cursor: Some(cursor), + header, + } + } else { + let mut reader = noodles_bam::io::indexed_reader::Builder::default() + .build_from_path(&path) + .with_context(|| format!("failed to open indexed BAM {}", path.display()))?; + let header = Header::from_noodles( + reader + .read_header() + .context("failed to read indexed BAM header")?, + ); + IndexedBackend::Bam { + reader, + header, + scratch: noodles_bam::Record::default(), + cursor: BamCursor::Done, + } + }; + Ok(Self { + backend, + source_path: path, + cram_reference: None, + }) + } + + /// Set the reference FASTA for CRAM decoding. + pub fn set_reference>(&mut self, path: P) -> Result<()> { + let path = path.as_ref().to_path_buf(); + self.cram_reference = Some(path.clone()); + let cram_path = self.source_path.clone(); + if let IndexedBackend::Cram { cursor, header } = &mut self.backend { + let repository = load_fasta_repository(&path)?; + let mut reader = noodles_cram::io::indexed_reader::Builder::default() + .set_reference_sequence_repository(repository) + .build_from_path(&cram_path)?; + let sam_header = reader.read_header()?; + *header = Header::from_noodles(sam_header.clone()); + *cursor = Some( + CramIndexedCursorBuilder { + reader, + header: sam_header, + iter_builder: |_reader, _header| Box::new(std::iter::empty()) as CramRecordIter, + } + .build(), + ); + } + Ok(()) + } + + /// Enable BGZF decompression threads (BAM only). + /// + /// Indexed fetching dominates runtime; this is a no-op for indexed readers. + pub fn set_threads(&mut self, _threads: usize) -> Result<()> { + Ok(()) + } + + /// Seek to a reference ID or the unmapped read bucket. + pub fn fetch>(&mut self, target: T) -> Result<()> { + match target.into() { + FetchTarget::Tid(tid) => self.fetch_tid(tid), + FetchTarget::Unmapped => self.fetch_unmapped(), + } + } + + fn fetch_tid(&mut self, tid: u32) -> Result<()> { + match &mut self.backend { + IndexedBackend::Bam { reader, cursor, .. } => { + // Resolve the chunks covering the whole reference straight from + // the index and stream them lazily, rather than buffering every + // record on the reference in memory. + let chunks = reader + .index() + .query(tid as usize, Interval::from(..)) + .with_context(|| format!("failed to query index for tid {tid}"))?; + *cursor = BamCursor::Region { + chunks: chunks.into_iter(), + chunk_end: None, + reference_sequence_id: tid as usize, + }; + Ok(()) + } + IndexedBackend::Cram { cursor, header } => { + // Rebuild the cursor with a fresh region query. noodles' query + // iterator borrows the reader, so we recover the owned reader via + // `into_heads` and stream container-by-container from there. + let region = region_for_tid(header, tid); + let heads = cursor.take().expect("CRAM cursor present").into_heads(); + *cursor = Some( + CramIndexedCursorTryBuilder { + reader: heads.reader, + header: heads.header, + iter_builder: |reader, header| -> Result { + let query = reader + .query(header, ®ion) + .with_context(|| format!("failed to query CRAM tid {tid}"))?; + Ok(Box::new(query)) + }, + } + .try_build()?, + ); + Ok(()) + } + } + } + + fn fetch_unmapped(&mut self) -> Result<()> { + let source_path = self.source_path.clone(); + match &mut self.backend { + IndexedBackend::Bam { reader, cursor, .. } => { + // Seek to the start of the unmapped read region and stream from + // there, filtering for unmapped records as we go. + match reader.index().last_first_record_start_position() { + Some(pos) => { + reader + .get_mut() + .seek_to_virtual_position(pos) + .context("failed to seek to unmapped BAM reads")?; + } + None => { + // The index carries no metadata pseudo-bin, so there is + // no recorded unmapped offset. Reopen and stream from the + // first record, relying on the per-record unmapped filter. + let mut fresh = noodles_bam::io::indexed_reader::Builder::default() + .build_from_path(&source_path) + .with_context(|| { + format!("failed to reopen indexed BAM {}", source_path.display()) + })?; + fresh + .read_header() + .context("failed to read indexed BAM header")?; + *reader = fresh; + } + } + *cursor = BamCursor::Unmapped; + Ok(()) + } + IndexedBackend::Cram { cursor, .. } => { + let heads = cursor.take().expect("CRAM cursor present").into_heads(); + *cursor = Some( + CramIndexedCursorTryBuilder { + reader: heads.reader, + header: heads.header, + iter_builder: |reader, header| -> Result { + let query = reader + .query_unmapped(header) + .context("failed to query unmapped CRAM reads")?; + Ok(Box::new(query)) + }, + } + .try_build()?, + ); + Ok(()) + } + } + } +} + +impl Read for IndexedReader { + fn read(&mut self, record: &mut Record) -> Option> { + match &mut self.backend { + IndexedBackend::Bam { + reader, + header, + scratch, + cursor, + } => loop { + // Take ownership of the cursor each iteration so `reader` can be + // mutated (seek/read) and the new state written back without + // fighting the borrow checker. Early returns leave the cursor as + // `Done`, which is the correct exhausted state. + match std::mem::replace(cursor, BamCursor::Done) { + BamCursor::Done => return None, + BamCursor::Unmapped => match reader.read_record(scratch) { + Ok(0) => return None, + Ok(_) => { + *cursor = BamCursor::Unmapped; + if scratch.flags().is_unmapped() { + return Some( + Record::from_bam(header.noodles_header(), scratch) + .and_then(|loaded| store_record(record, loaded)), + ); + } + // Mapped record in the unmapped tail: skip and continue. + } + Err(e) => return Some(Err(e.into())), + }, + BamCursor::Region { + mut chunks, + chunk_end, + reference_sequence_id, + } => match chunk_end { + None => match chunks.next() { + Some(chunk) => { + if let Err(e) = + reader.get_mut().seek_to_virtual_position(chunk.start()) + { + return Some(Err(anyhow::Error::new(e) + .context("failed to seek to BAM index chunk"))); + } + *cursor = BamCursor::Region { + chunks, + chunk_end: Some(chunk.end()), + reference_sequence_id, + }; + } + None => return None, + }, + Some(end) => { + if reader.get_mut().virtual_position() >= end { + // Reached the end of this chunk; advance to the next. + *cursor = BamCursor::Region { + chunks, + chunk_end: None, + reference_sequence_id, + }; + continue; + } + match reader.read_record(scratch) { + Ok(0) => { + *cursor = BamCursor::Region { + chunks, + chunk_end: None, + reference_sequence_id, + }; + } + Ok(_) => { + *cursor = BamCursor::Region { + chunks, + chunk_end: Some(end), + reference_sequence_id, + }; + // A chunk may begin with a few records from a + // neighbouring reference; keep only ours. + let on_reference = match scratch.reference_sequence_id() { + Some(Ok(id)) => id == reference_sequence_id, + Some(Err(e)) => return Some(Err(e.into())), + None => false, + }; + if on_reference { + return Some( + Record::from_bam(header.noodles_header(), scratch) + .and_then(|loaded| store_record(record, loaded)), + ); + } + } + Err(e) => return Some(Err(e.into())), + } + } + }, + } + }, + IndexedBackend::Cram { cursor, .. } => { + let cursor = cursor.as_mut()?; + match cursor.with_iter_mut(|iter| iter.next()) { + Some(Ok(buf)) => Some(store_record(record, Record::from_buf(buf, None))), + Some(Err(e)) => Some(Err(e.into())), + None => None, + } + } + } + } +} + +fn region_for_tid(header: &Header, tid: u32) -> Region { + // Query the whole reference sequence by name. Building the `Region` directly + // (rather than formatting and re-parsing `name:start-end`) avoids mangling + // reference names that contain `:` or `-` (e.g. HLA contigs) and preserves + // non-UTF-8 names byte-for-byte. + Region::new(header.tid2name(tid).to_vec(), ..) +} + +#[cfg(test)] +mod tests { + use std::num::NonZero; + use std::sync::atomic::{AtomicU64, Ordering}; + + use noodles::core::Position; + use noodles::fasta; + use noodles::sam::alignment::io::Write as _; + use noodles::sam::alignment::record::cigar::{op::Kind, Op}; + use noodles::sam::alignment::record::Flags; + use noodles::sam::alignment::RecordBuf; + use noodles::sam::header::record::value::map::ReferenceSequence; + use noodles::sam::header::record::value::Map; + use noodles::{cram, sam}; + + use super::{Read, Reader}; + use crate::rna::bam::Record; + + static COUNTER: AtomicU64 = AtomicU64::new(0); + + fn temp_path(suffix: &str) -> std::path::PathBuf { + let id = COUNTER.fetch_add(1, Ordering::Relaxed); + std::env::temp_dir().join(format!( + "rustqc_cram_test_{}_{id}_{suffix}", + std::process::id() + )) + } + + // Writes a tiny reference-backed CRAM file and streams it back through the + // ouroboros-backed sequential `Reader`, verifying the records come through in + // order rather than being buffered or dropped. + #[test] + fn streams_cram_records_sequentially() { + let reference = b"ACGTACGTACGT".to_vec(); + let cram_path = temp_path("seq.cram"); + let fasta_path = temp_path("ref.fa"); + + std::fs::write(&fasta_path, b">ref0\nACGTACGTACGT\n").unwrap(); + + let repository = fasta::Repository::new(vec![fasta::Record::new( + fasta::record::Definition::new("ref0", None), + fasta::record::Sequence::from(reference.clone()), + )]); + + let header = sam::Header::builder() + .add_reference_sequence( + "ref0", + Map::::new(NonZero::new(reference.len()).unwrap()), + ) + .build(); + + let expected = [(b"r1".to_vec(), 0i64), (b"r2".to_vec(), 4i64)]; + let records: Vec = expected + .iter() + .map(|(name, start_zero)| { + let start = Position::new(*start_zero as usize + 1).unwrap(); + RecordBuf::builder() + .set_name(&name[..]) + .set_flags(Flags::empty()) + .set_reference_sequence_id(0) + .set_alignment_start(start) + .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect()) + .set_sequence(b"ACGT".to_vec().into()) + .set_quality_scores(vec![30u8; 4].into()) + .build() + }) + .collect(); + + { + let mut writer = cram::io::writer::Builder::default() + .set_reference_sequence_repository(repository) + .build_from_path(&cram_path) + .unwrap(); + writer.write_header(&header).unwrap(); + for record in &records { + writer.write_alignment_record(&header, record).unwrap(); + } + writer.try_finish(&header).unwrap(); + } + + let mut reader = Reader::from_path(&cram_path).unwrap(); + reader.set_reference(&fasta_path).unwrap(); + + let mut record = Record::new(); + let mut got = Vec::new(); + while let Some(result) = reader.read(&mut record) { + result.unwrap(); + got.push((record.qname().to_vec(), record.pos())); + } + + let _ = std::fs::remove_file(&cram_path); + let _ = std::fs::remove_file(&fasta_path); + + assert_eq!(got, expected.to_vec()); + } +} diff --git a/src/rna/bam/mod.rs b/src/rna/bam/mod.rs new file mode 100644 index 0000000..9fa7252 --- /dev/null +++ b/src/rna/bam/mod.rs @@ -0,0 +1,38 @@ +//! BAM/SAM/CRAM compatibility layer backed by [noodles](https://crates.io/crates/noodles). +//! +//! This module exposes a rust-htslib-shaped API so the rest of RustQC can read +//! alignment files without linking to htslib, while preserving samtools-identical +//! statistics output. + +mod align_header; +mod align_record; +mod cigar; +mod io; +mod writer; + +pub use align_header::Header; +pub use align_record::{Aux, Record, Seq}; +pub use io::{FetchDefinition, IndexedReader, Reader}; + +/// Trait mirroring `rust_htslib::bam::Read`. +pub use io::Read; + +/// Record sub-module mirroring `rust_htslib::bam::record`. +pub mod record { + pub use super::align_record::{Aux, Record, Seq}; + pub use super::cigar::{Cigar, CigarString, CigarStringView}; +} + +/// Header sub-module mirroring `rust_htslib::bam::header`. +pub mod header { + pub use super::align_header::Header; + pub use super::writer::{HeaderRecord, HeaderView}; +} + +/// BAM index helpers used by integration tests. +pub mod index { + pub use super::writer::index::*; +} + +/// BAM writer helpers used by integration tests. +pub use writer::{Format, Writer}; diff --git a/src/rna/bam/writer.rs b/src/rna/bam/writer.rs new file mode 100644 index 0000000..97088ad --- /dev/null +++ b/src/rna/bam/writer.rs @@ -0,0 +1,190 @@ +//! Test helpers for writing BAM files (integration tests). + +use std::fs::File; +use std::io; +use std::num::NonZero; +use std::path::{Path, PathBuf}; + +use noodles::bam as noodles_bam; +use noodles::sam as noodles_sam; +use noodles::sam::header::record::value::map::header::{tag, Version}; +use noodles::sam::header::record::value::map::{self, ReferenceSequence}; +use noodles::sam::header::record::value::Map; + +use noodles::sam::alignment::io::Write as AlignmentWrite; + +use super::align_header::Header; +use super::align_record::{record_buf_for_writer, Record}; + +fn parse_sam_version(version: &str) -> Version { + let mut parts = version.split('.'); + let major = parts.next().and_then(|s| s.parse().ok()).unwrap_or(1); + let minor = parts.next().and_then(|s| s.parse().ok()).unwrap_or(6); + Version::new(major, minor) +} + +/// BAM output format. +#[derive(Debug, Clone, Copy)] +pub enum Format { + /// Binary BAM. + Bam, +} + +/// SAM header builder record (rust-htslib-compatible). +#[derive(Debug, Clone)] +pub struct HeaderRecord { + tag: [u8; 2], + fields: Vec<(Vec, String)>, +} + +impl HeaderRecord { + /// Create a new header line builder (`@HD`, `@SQ`, etc.). + pub fn new(tag: &[u8; 2]) -> Self { + Self { + tag: *tag, + fields: Vec::new(), + } + } + + /// Add a `TAG:value` field and return self for chaining. + pub fn push_tag(mut self, tag: &[u8], value: impl Into) -> Self { + self.fields.push((tag.to_vec(), value.into())); + self + } +} + +/// Header view used when parsing SAM lines into records. +#[derive(Debug, Clone)] +pub struct HeaderView { + header: noodles_sam::Header, +} + +impl HeaderView { + /// Build a header view from a RustQC header wrapper. + pub fn from_header(header: &Header) -> Self { + Self { + header: header.noodles_header().clone(), + } + } +} + +/// BAM writer for test fixtures. +pub struct Writer { + inner: noodles_bam::io::Writer>, + header: noodles_sam::Header, +} + +impl Writer { + /// Create a BAM writer at `path`. + pub fn from_path(path: &Path, header: &Header, _format: Format) -> io::Result { + let file = File::create(path)?; + let mut inner = noodles_bam::io::Writer::new(file); + inner.write_header(header.noodles_header())?; + Ok(Self { + inner, + header: header.noodles_header().clone(), + }) + } + + /// Write a BAM record. + pub fn write(&mut self, record: &Record) -> io::Result<()> { + self.inner + .write_alignment_record(&self.header, record_buf_for_writer(record)) + } +} + +impl Header { + /// Append a `@`-prefixed header record. + pub fn push_record(&mut self, record: HeaderRecord) { + let mut inner = self.inner.clone(); + match &record.tag { + b"HD" => { + let mut version = "1.6".to_string(); + let mut sort_order = None; + for (tag, value) in &record.fields { + match &tag[..] { + b"VN" => version = value.clone(), + b"SO" => sort_order = Some(value.clone()), + _ => {} + } + } + let version = parse_sam_version(&version); + let mut hd_builder = Map::::builder().set_version(version); + if let Some(so) = sort_order { + hd_builder = hd_builder.insert(tag::SORT_ORDER, so); + } + let hd = hd_builder.build().expect("valid SAM header map"); + inner = noodles_sam::Header::builder() + .set_header(hd) + .set_reference_sequences(inner.reference_sequences().clone()) + .build(); + } + b"SQ" => { + let mut name = String::new(); + let mut len = 0u64; + for (tag, value) in &record.fields { + match &tag[..] { + b"SN" => name = value.clone(), + b"LN" => len = value.parse().unwrap_or(0), + _ => {} + } + } + if !name.is_empty() { + let mut builder = noodles_sam::Header::builder(); + if let Some(hd) = inner.header().cloned() { + builder = builder.set_header(hd); + } + for (existing_name, map) in inner.reference_sequences() { + builder = builder.add_reference_sequence( + String::from_utf8_lossy(existing_name.as_ref()).as_ref(), + map.clone(), + ); + } + builder = builder.add_reference_sequence( + name.as_str(), + Map::::new(NonZero::new(len.max(1) as usize).unwrap()), + ); + inner = builder.build(); + } + } + _ => {} + } + *self = Header::from_noodles(inner); + } +} + +impl Record { + /// Parse a SAM line into a BAM record. + pub fn from_sam(header: &HeaderView, line: &[u8]) -> io::Result { + let sam_record = noodles_sam::Record::try_from(line)?; + super::align_record::alignment_to_record(&header.header, &sam_record) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } +} + +/// BAM index construction (test fixtures). +pub mod index { + use super::*; + + /// Index type (BAI only in tests). + #[derive(Debug, Clone, Copy)] + pub enum Type { + /// BAI index. + Bai, + } + + /// Build a BAI index beside a coordinate-sorted BAM file. + pub fn build( + bam_path: &Path, + index_path: Option<&Path>, + _kind: Type, + _min_shift: u32, + ) -> io::Result<()> { + let index = noodles_bam::fs::index(bam_path)?; + let index_path = index_path.map_or_else( + || PathBuf::from(format!("{}.bai", bam_path.display())), + |p| p.to_path_buf(), + ); + noodles_bam::bai::fs::write(&index_path, &index) + } +} diff --git a/src/rna/bam_flags.rs b/src/rna/bam_flags.rs index 949601a..050ab4c 100644 --- a/src/rna/bam_flags.rs +++ b/src/rna/bam_flags.rs @@ -2,6 +2,9 @@ //! //! Centralised definitions so every module uses the same constants. +use crate::rna::bam::record::Aux; +use crate::rna::bam::Record; + /// Read is paired in sequencing (0x1). pub const BAM_FPAIRED: u16 = 0x1; /// Read is mapped in a proper pair (0x2). @@ -36,8 +39,7 @@ pub const BAM_FSUPPLEMENTARY: u16 = 0x800; /// Handles all integer Aux variants (U8, U16, U32, I8, I16, I32) /// and returns the value as `i64`. Returns `None` if the tag is /// absent or has a non-integer type. -pub fn get_aux_int(record: &rust_htslib::bam::Record, tag: &[u8]) -> Option { - use rust_htslib::bam::record::Aux; +pub fn get_aux_int(record: &Record, tag: &[u8]) -> Option { match record.aux(tag) { Ok(Aux::U8(v)) => Some(v as i64), Ok(Aux::U16(v)) => Some(v as i64), diff --git a/src/rna/dupradar/counting.rs b/src/rna/dupradar/counting.rs index 785115c..18c28fe 100644 --- a/src/rna/dupradar/counting.rs +++ b/src/rna/dupradar/counting.rs @@ -11,6 +11,7 @@ use crate::gtf::Gene; use crate::io::format_count; +use crate::rna::bam::{self, FetchDefinition, Read as BamRead}; use crate::rna::qualimap::QualimapAccum; use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig}; use crate::Strandedness; @@ -20,7 +21,6 @@ use indexmap::IndexMap; use indicatif::ProgressBar; use log::{debug, warn}; use rayon::prelude::*; -use rust_htslib::bam::{self, FetchDefinition, Read as BamRead}; use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; @@ -418,10 +418,10 @@ fn strand_matches( /// * `blocks` - Reusable buffer for aligned blocks (cleared before use) fn cigar_to_aligned_blocks( start: u64, - cigar: &rust_htslib::bam::record::CigarStringView, + cigar: &crate::rna::bam::record::CigarStringView, blocks: &mut Vec<(u64, u64)>, ) { - use rust_htslib::bam::record::Cigar; + use crate::rna::bam::record::Cigar; blocks.clear(); let mut ref_pos = start; diff --git a/src/rna/mod.rs b/src/rna/mod.rs index 7dbcc09..3ed5454 100644 --- a/src/rna/mod.rs +++ b/src/rna/mod.rs @@ -3,6 +3,7 @@ //! Contains dupRadar duplication rate analysis, featureCounts-compatible output, //! and RSeQC tool reimplementations. +pub mod bam; pub mod bam_flags; pub mod cpp_rng; pub mod dupradar; diff --git a/src/rna/preseq.rs b/src/rna/preseq.rs index 592664b..98debb8 100644 --- a/src/rna/preseq.rs +++ b/src/rna/preseq.rs @@ -4,9 +4,9 @@ //! sequencing depth using the Good-Toulmin rational function extrapolation //! method, matching the behavior of preseq v3. +use crate::rna::bam; use anyhow::{bail, Context, Result}; use log::debug; -use rust_htslib::bam; use std::collections::HashMap; use std::io::Write; use std::path::Path; @@ -154,7 +154,7 @@ impl PreseqAccum { let tid = record.tid(); let start = record.pos(); - let end = record.cigar().end_pos(); + let end = record.cigar().end_pos() as i64; let info = MateInfo { tid, start, end }; diff --git a/src/rna/qualimap/accumulator.rs b/src/rna/qualimap/accumulator.rs index f8e82fd..6b35808 100644 --- a/src/rna/qualimap/accumulator.rs +++ b/src/rna/qualimap/accumulator.rs @@ -9,9 +9,9 @@ use std::collections::{HashMap, HashSet}; +use crate::rna::bam; +use crate::rna::bam::record::Cigar; use coitrees::IntervalTree; -use rust_htslib::bam; -use rust_htslib::bam::record::Cigar; use crate::Strandedness; diff --git a/src/rna/rseqc/accumulators.rs b/src/rna/rseqc/accumulators.rs index b91a409..a2ad5a1 100644 --- a/src/rna/rseqc/accumulators.rs +++ b/src/rna/rseqc/accumulators.rs @@ -7,9 +7,9 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::hash::{Hash, Hasher}; +use crate::rna::bam; use anyhow::Result; use indexmap::IndexMap; -use rust_htslib::bam; use super::bam_stat::{BamStatResult, GcDepthBin}; @@ -483,21 +483,11 @@ impl BamStatAccum { let seq_len = record.seq_len(); if seq_len > 0 { - // SAFETY: We access the raw BAM record data to compute CRC32 - // checksums matching samtools' approach. The pointer arithmetic - // replicates htslib's bam_get_seq() macro: - // data + l_qname + (n_cigar << 2) - // The seq_len > 0 guard above ensures sequence data exists. - // The slice length seq_len.div_ceil(2) matches the BAM spec's - // 4-bit encoded sequence format: (seq_len+1)/2 bytes. - let seq_bytes = unsafe { - let inner = record.inner(); - let data = inner.data; - let seq_offset = - inner.core.l_qname as isize + ((inner.core.n_cigar as isize) << 2); - let seq_nbytes = seq_len.div_ceil(2); - std::slice::from_raw_parts(data.offset(seq_offset), seq_nbytes) - }; + let seq_len = seq_len as usize; + let seq = record.seq(); + let seq_bytes = seq.encoded_bytes(); + let seq_nbytes = seq_len.div_ceil(2); + let seq_bytes = &seq_bytes[..seq_nbytes.min(seq_bytes.len())]; let seq_crc = crc32fast::hash(seq_bytes); self.chk[1] = self.chk[1].wrapping_add(seq_crc); @@ -569,21 +559,21 @@ impl BamStatAccum { // separate full CIGAR traversal here. // NM tag (edit distance) - if let Ok(rust_htslib::bam::record::Aux::U8(nm)) = record.aux(b"NM") { + if let Ok(bam::record::Aux::U8(nm)) = record.aux(b"NM") { self.mismatches += u64::from(nm); - } else if let Ok(rust_htslib::bam::record::Aux::U16(nm)) = record.aux(b"NM") { + } else if let Ok(bam::record::Aux::U16(nm)) = record.aux(b"NM") { self.mismatches += u64::from(nm); - } else if let Ok(rust_htslib::bam::record::Aux::U32(nm)) = record.aux(b"NM") { + } else if let Ok(bam::record::Aux::U32(nm)) = record.aux(b"NM") { self.mismatches += u64::from(nm); - } else if let Ok(rust_htslib::bam::record::Aux::I8(nm)) = record.aux(b"NM") { + } else if let Ok(bam::record::Aux::I8(nm)) = record.aux(b"NM") { if nm > 0 { self.mismatches += nm as u64; } - } else if let Ok(rust_htslib::bam::record::Aux::I16(nm)) = record.aux(b"NM") { + } else if let Ok(bam::record::Aux::I16(nm)) = record.aux(b"NM") { if nm > 0 { self.mismatches += nm as u64; } - } else if let Ok(rust_htslib::bam::record::Aux::I32(nm)) = record.aux(b"NM") { + } else if let Ok(bam::record::Aux::I32(nm)) = record.aux(b"NM") { if nm > 0 { self.mismatches += nm as u64; } @@ -640,7 +630,7 @@ impl BamStatAccum { // Cap at MAX_INSERT_SIZE (8000), matching // samtools stats which accumulates overflow // into the cap bucket. - let capped = abs_tlen.min(8000); + let capped = u64::from(abs_tlen.min(8000)); let entry = self.is_hist.entry(capped).or_insert([0; 4]); entry[0] += 1; // total entry[orientation_idx] += 1; @@ -824,7 +814,7 @@ impl BamStatAccum { // Buffer grown to max_read_len * 5 as needed. // ============================================================= if is_mapped && !is_secondary { - use rust_htslib::bam::record::Cigar as C; + use crate::rna::bam::record::Cigar as C; let is_reverse = flags & BAM_FREVERSE != 0; let read_len = record.seq_len(); let tid = record.tid(); @@ -846,7 +836,7 @@ impl BamStatAccum { // When growing, linearise the circular data just like // upstream samtools: copy [idx..old_size] then [0..idx] // into a fresh buffer, and reset idx to 0. - let need = read_len * 5; + let need = (read_len * 5) as usize; if need > self.cov_buf.len() { let old_size = self.cov_buf.len(); let mut new_buf = vec![0u32; need]; @@ -874,14 +864,14 @@ impl BamStatAccum { // Single CIGAR traversal serving IC/ID + bases_mapped_cigar + COV let cigar = record.cigar(); - let mut icycle: usize = 0; + let mut icycle: i32 = 0; let mut cigar_mapped: u64 = 0; let mut ref_pos = pos; for op in cigar.iter() { match op { C::Ins(n) => { - let ncig = *n as usize; + let ncig = *n as i32; let len = *n as u64; cigar_mapped += len; // I counts toward bases_mapped_cigar @@ -891,9 +881,9 @@ impl BamStatAccum { // IC: indels per cycle (read-oriented index) let idx = if is_reverse { - read_len.saturating_sub(icycle + ncig) + read_len.saturating_sub(icycle + ncig) as usize } else { - icycle + icycle as usize }; if idx >= self.ic.len() { self.ic.resize(idx + 1, [0u64; 4]); @@ -922,13 +912,13 @@ impl BamStatAccum { ref_pos += *n as i64; // still advance ref for COV continue; } - read_len.saturating_sub(icycle + 1) + read_len.saturating_sub(icycle + 1) as usize } else { if icycle == 0 { ref_pos += *n as i64; continue; } - icycle - 1 + (icycle - 1) as usize }; if idx >= self.ic.len() { self.ic.resize(idx + 1, [0u64; 4]); @@ -945,7 +935,7 @@ impl BamStatAccum { C::Match(n) | C::Equal(n) | C::Diff(n) => { let len = *n as u64; cigar_mapped += len; // M/=/X count toward bases_mapped_cigar - icycle += *n as usize; + icycle += *n as i32; // COV: M/=/X consumes reference positions if do_cov { let end = ref_pos + *n as i64; @@ -959,8 +949,8 @@ impl BamStatAccum { ref_pos += *n as i64; // N advances ref (COV skips it) } C::SoftClip(n) => { - icycle += *n as usize; // S advances query cycle - // COV: S consumes no reference positions + icycle += *n as i32; // S advances query cycle + // COV: S consumes no reference positions } C::HardClip(_) | C::Pad(_) => {} } @@ -1011,7 +1001,7 @@ impl BamStatAccum { let seq = record.seq(); let mut count: u32 = 0; for i in 0..seq_len { - let base = seq.encoded_base(i); + let base = seq.encoded_base(i as usize); if base == 2 || base == 4 { count += 1; } @@ -1076,7 +1066,7 @@ impl BamStatAccum { let has_splice = record .cigar() .iter() - .any(|op| matches!(op, rust_htslib::bam::record::Cigar::RefSkip(_))); + .any(|op| matches!(op, crate::rna::bam::record::Cigar::RefSkip(_))); if has_splice { self.splice += 1; } else { @@ -1370,7 +1360,7 @@ impl InferExpAccum { .cigar() .iter() .filter_map(|op| { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; match op { Match(len) | Ins(len) | Equal(len) | Diff(len) => Some(*len as u64), _ => None, @@ -1504,7 +1494,7 @@ fn hash_sequence_encoded(seq: &bam::record::Seq<'_>) -> u128 { /// Hash position key matching RSeQC's `fetch_exon` + position key logic. /// Uses FNV-1a hashing to avoid string allocation per read. fn hash_position_key(chrom: &str, pos: i64, cigar: &bam::record::CigarStringView) -> u64 { - use rust_htslib::bam::record::Cigar; + use crate::rna::bam::record::Cigar; let mut h = crate::io::FNV1A_OFFSET; crate::io::fnv1a_update(&mut h, chrom.as_bytes()); @@ -2061,7 +2051,7 @@ fn compute_qalen_and_intron_size(record: &bam::Record) -> (u64, u64) { let mut qalen: u64 = 0; let mut intron_size: u64 = 0; for op in record.cigar().iter() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; match op { Match(len) | Equal(len) | Diff(len) => qalen += *len as u64, Ins(len) => qalen += *len as u64, @@ -2081,7 +2071,7 @@ fn fetch_exon_blocks_rseqc(record: &bam::Record) -> Vec<(u64, u64)> { let mut chrom_st = record.pos() as u64; for op in record.cigar().iter() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; match op { Match(len) => { let start = chrom_st; diff --git a/src/rna/rseqc/bam_stat.rs b/src/rna/rseqc/bam_stat.rs index 2cd06ac..afa6431 100644 --- a/src/rna/rseqc/bam_stat.rs +++ b/src/rna/rseqc/bam_stat.rs @@ -326,8 +326,8 @@ pub fn write_bam_stat(result: &BamStatResult, output_path: &Path) -> Result<()> #[cfg(test)] mod tests { + use crate::rna::bam::{self, Read as BamRead}; use crate::rna::rseqc::accumulators::BamStatAccum; - use rust_htslib::bam::{self, Read as BamRead}; #[test] fn test_bam_stat_small() { diff --git a/src/rna/rseqc/common.rs b/src/rna/rseqc/common.rs index 92896ef..26c4187 100644 --- a/src/rna/rseqc/common.rs +++ b/src/rna/rseqc/common.rs @@ -27,8 +27,8 @@ use crate::gtf::Gene; /// /// # Returns /// Vector of `(intron_start, intron_end)` tuples (0-based coordinates). -pub fn fetch_introns(start_pos: u64, cigar: &[rust_htslib::bam::record::Cigar]) -> Vec<(u64, u64)> { - use rust_htslib::bam::record::Cigar::*; +pub fn fetch_introns(start_pos: u64, cigar: &[crate::rna::bam::record::Cigar]) -> Vec<(u64, u64)> { + use crate::rna::bam::record::Cigar::*; let mut pos = start_pos; let mut introns = Vec::new(); @@ -202,7 +202,7 @@ mod tests { #[test] fn test_fetch_introns_simple() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; // 50M500N50M — one intron at position 100+50=150 to 150+500=650 let cigar = vec![Match(50), RefSkip(500), Match(50)]; let introns = fetch_introns(100, &cigar); @@ -212,7 +212,7 @@ mod tests { #[test] fn test_fetch_introns_multiple() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; // 10M500N20M300N10M — two introns let cigar = vec![Match(10), RefSkip(500), Match(20), RefSkip(300), Match(10)]; let introns = fetch_introns(100, &cigar); @@ -223,7 +223,7 @@ mod tests { #[test] fn test_fetch_introns_with_deletions() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; // 10M5D10M500N10M let cigar = vec![Match(10), Del(5), Match(10), RefSkip(500), Match(10)]; let introns = fetch_introns(100, &cigar); @@ -233,7 +233,7 @@ mod tests { #[test] fn test_fetch_introns_no_introns() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; let cigar = vec![Match(100)]; let introns = fetch_introns(100, &cigar); assert!(introns.is_empty()); @@ -241,7 +241,7 @@ mod tests { #[test] fn test_fetch_introns_soft_clip_no_advance() { - use rust_htslib::bam::record::Cigar::*; + use crate::rna::bam::record::Cigar::*; // 5S50M500N50M — soft clip should NOT advance position let cigar = vec![SoftClip(5), Match(50), RefSkip(500), Match(50)]; let introns = fetch_introns(100, &cigar); diff --git a/src/rna/rseqc/flagstat.rs b/src/rna/rseqc/flagstat.rs index cb1e370..24cded1 100644 --- a/src/rna/rseqc/flagstat.rs +++ b/src/rna/rseqc/flagstat.rs @@ -158,8 +158,8 @@ pub fn write_flagstat(result: &BamStatResult, output_path: &Path) -> Result<()> #[cfg(test)] mod tests { use super::*; + use crate::rna::bam::{self, Read as BamRead}; use crate::rna::rseqc::accumulators::BamStatAccum; - use rust_htslib::bam::{self, Read as BamRead}; use std::io::Read; #[test] fn test_flagstat_format() { diff --git a/src/rna/rseqc/idxstats.rs b/src/rna/rseqc/idxstats.rs index 93c018f..de81e02 100644 --- a/src/rna/rseqc/idxstats.rs +++ b/src/rna/rseqc/idxstats.rs @@ -60,8 +60,8 @@ pub fn write_idxstats( #[cfg(test)] mod tests { use super::*; + use crate::rna::bam::{self, Read as BamRead}; use crate::rna::rseqc::accumulators::BamStatAccum; - use rust_htslib::bam::{self, Read as BamRead}; use std::io::Read; #[test] fn test_idxstats_format() { diff --git a/src/rna/rseqc/read_duplication.rs b/src/rna/rseqc/read_duplication.rs index 005b2ed..e8491d0 100644 --- a/src/rna/rseqc/read_duplication.rs +++ b/src/rna/rseqc/read_duplication.rs @@ -156,8 +156,8 @@ fn write_r_script( #[cfg(test)] mod tests { use super::*; + use crate::rna::bam::{self, Read as BamRead}; use log::debug; - use rust_htslib::bam::{self, Read as BamRead}; use std::collections::HashMap; use std::time::Instant; @@ -166,7 +166,7 @@ mod tests { /// Constructs `{chrom}:{start}:{exon1_start}-{exon1_end}:{exon2_start}-{exon2_end}:...` /// matching RSeQC's `fetch_exon` + position key logic. fn build_position_key(chrom: &str, pos: i64, cigar: &bam::record::CigarStringView) -> String { - use rust_htslib::bam::record::Cigar; + use crate::rna::bam::record::Cigar; let mut key = format!("{}:{}:", chrom, pos); let mut ref_pos = pos; @@ -218,8 +218,9 @@ mod tests { let mut pos_dup: HashMap = HashMap::new(); let mut total_processed = 0u64; - for result in bam.records() { - let record = result.context("Failed to read BAM record")?; + let mut record = bam::Record::new(); + while let Some(result) = bam.read(&mut record) { + result.context("Failed to read BAM record")?; if record.is_unmapped() || record.is_quality_check_failed() || record.mapq() < mapq_cut { @@ -274,8 +275,8 @@ mod tests { #[test] fn test_build_position_key_simple() { - use rust_htslib::bam::record::Cigar; - use rust_htslib::bam::record::{CigarString, CigarStringView}; + use crate::rna::bam::record::Cigar; + use crate::rna::bam::record::{CigarString, CigarStringView}; let cigar_ops = vec![Cigar::Match(50)]; let cigar_string = CigarString(cigar_ops); @@ -287,8 +288,8 @@ mod tests { #[test] fn test_build_position_key_spliced() { - use rust_htslib::bam::record::Cigar; - use rust_htslib::bam::record::{CigarString, CigarStringView}; + use crate::rna::bam::record::Cigar; + use crate::rna::bam::record::{CigarString, CigarStringView}; let cigar_ops = vec![Cigar::Match(10), Cigar::RefSkip(500), Cigar::Match(20)]; let cigar_string = CigarString(cigar_ops); diff --git a/src/rna/rseqc/stats.rs b/src/rna/rseqc/stats.rs index 20abf8c..a583e00 100644 --- a/src/rna/rseqc/stats.rs +++ b/src/rna/rseqc/stats.rs @@ -1006,8 +1006,8 @@ fn write_gc_depth( #[cfg(test)] mod tests { use super::*; + use crate::rna::bam::{self, Read as BamRead}; use crate::rna::rseqc::accumulators::BamStatAccum; - use rust_htslib::bam::{self, Read as BamRead}; use std::io::Read; #[test] fn test_stats_sn_format() { diff --git a/src/rna/rseqc/tin.rs b/src/rna/rseqc/tin.rs index 4ce2dda..f7ef41b 100644 --- a/src/rna/rseqc/tin.rs +++ b/src/rna/rseqc/tin.rs @@ -4,7 +4,7 @@ //! uniformity across sampled exonic positions. Reimplementation of //! RSeQC's `tin.py` tool. -use rust_htslib::bam; +use crate::rna::bam; use std::collections::{HashMap, HashSet}; use std::hash::{BuildHasher, Hasher}; use std::io::Write; @@ -638,7 +638,7 @@ fn compute_tin(coverage: &[u32], n_total_positions: usize) -> f64 { /// Fill `buf` with aligned blocks from the CIGAR, reusing the existing /// Vec capacity to avoid per-read heap allocation. fn fill_aligned_blocks(record: &bam::Record, buf: &mut Vec<(u64, u64)>) { - use rust_htslib::bam::record::Cigar; + use crate::rna::bam::record::Cigar; buf.clear(); let mut pos = record.pos() as u64; diff --git a/tests/integration_test.rs b/tests/integration_test.rs index c7ea3ac..45fc450 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -9,8 +9,8 @@ use std::path::PathBuf; use std::process::Command; use std::time::{SystemTime, UNIX_EPOCH}; -use rust_htslib::bam; -use rust_htslib::bam::header::HeaderRecord; +use rustqc::rna::bam; +use rustqc::rna::bam::header::{HeaderRecord, HeaderView}; /// Helper: get the path to the rustqc binary. /// @@ -129,11 +129,11 @@ fn write_bam_fixture(path: &Path, chroms: &[(&str, u64)], sam_lines: &[&str]) { header.push_record( HeaderRecord::new(b"SQ") .push_tag(b"SN", *chrom) - .push_tag(b"LN", *len as i64), + .push_tag(b"LN", len.to_string()), ); } - let header_view = bam::HeaderView::from_header(&header); + let header_view = HeaderView::from_header(&header); let mut writer = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap(); for line in sam_lines { let record = bam::Record::from_sam(&header_view, line.as_bytes()).unwrap();