diff --git a/CHANGELOG.md b/CHANGELOG.md index 23b1f33c..a4af8f6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 2026-06-25 + +### Candid 0.10.31 + +* Non-breaking changes: + + Encode and decode large `Nat`/`Int` values in linear time. Values beyond the `u64`/`i64` fast path were previously processed one LEB128/SLEB128 group at a time, shifting the whole bignum on every byte (O(n²) in the encoded length); they now build the value in a single O(n) pass. + +### didc 0.6.2 + +* Non-breaking changes: + + Upgrade candid_parser dependency to v0.4.0. + ## 2026-06-23 ### Candid 0.10.30 diff --git a/Cargo.lock b/Cargo.lock index a0e3ce9f..32489a8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -209,13 +209,13 @@ dependencies = [ [[package]] name = "candid" -version = "0.10.30" +version = "0.10.31" dependencies = [ "anyhow", "bincode", "binread", "byteorder", - "candid_derive 0.10.30", + "candid_derive 0.10.31", "candid_parser 0.4.0", "hex", "ic_principal 0.1.3", @@ -247,7 +247,7 @@ dependencies = [ [[package]] name = "candid_derive" -version = "0.10.30" +version = "0.10.31" dependencies = [ "lazy_static", "proc-macro2 1.0.86", @@ -280,7 +280,7 @@ version = "0.4.0" dependencies = [ "anyhow", "arbitrary", - "candid 0.10.30", + "candid 0.10.31", "codespan-reporting", "console", "convert_case", @@ -479,7 +479,7 @@ dependencies = [ [[package]] name = "didc" -version = "0.6.1" +version = "0.6.2" dependencies = [ "anyhow", "candid_parser 0.4.0", diff --git a/rust/bench/Cargo.lock b/rust/bench/Cargo.lock index 97aab136..38f0d53d 100644 --- a/rust/bench/Cargo.lock +++ b/rust/bench/Cargo.lock @@ -156,7 +156,7 @@ dependencies = [ [[package]] name = "candid" -version = "0.10.30" +version = "0.10.31" dependencies = [ "anyhow", "binread", @@ -177,7 +177,7 @@ dependencies = [ [[package]] name = "candid_derive" -version = "0.10.30" +version = "0.10.31" dependencies = [ "lazy_static", "proc-macro2", diff --git a/rust/bench/bench.rs b/rust/bench/bench.rs index 7dd94f41..18265de4 100644 --- a/rust/bench/bench.rs +++ b/rust/bench/bench.rs @@ -910,4 +910,64 @@ fn multi_arg() -> BenchResult { }) } +// Encoding and decoding of large `nat` / `int` bignums. +// A LEB128 body of BIGNUM_LEN bytes is far beyond the u64/i64 fast path, so this +// measures the bignum encode/decode routines on a large value. Their cost should +// scale linearly with the encoded length. +const BIGNUM_LEN: usize = 1 << 20; // 1 MiB encoded body + +// Build a single-argument Candid message: "DIDL", 0 type-table entries, 1 arg, +// the primitive type opcode (nat = 0x7d, int = 0x7c), then the value body. +fn single_nat_int_arg(type_opcode: u8, body: &[u8]) -> Vec { + let mut bytes = b"DIDL\x00\x01".to_vec(); + bytes.push(type_opcode); + bytes.extend_from_slice(body); + bytes +} + +#[bench(raw)] +fn nat_bignum() -> BenchResult { + // `len - 1` all-ones continuation groups then a 0x7f terminator => the nat + // `2^(7*BIGNUM_LEN) - 1`, whose canonical encoding is exactly this body. + let mut body = vec![0xffu8; BIGNUM_LEN - 1]; + body.push(0x7f); + let value = Decode!(&single_nat_int_arg(0x7d, &body), Nat).unwrap(); + + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); + bench_fn(|| { + let bytes = { + let _p = bench_scope("1. Encoding"); + Encode!(&value).unwrap() + }; + { + let _p = bench_scope("2. Decoding"); + Decode!([config]; &bytes, Nat).unwrap(); + } + }) +} + +#[bench(raw)] +fn int_bignum() -> BenchResult { + // `len - 1` all-ones groups then a 0x00 terminator => a large *positive* + // int (`2^(7*(BIGNUM_LEN-1)) - 1`); the trailing zero group keeps the sign + // bit clear so the value isn't sign-extended into a small negative. + let mut body = vec![0xffu8; BIGNUM_LEN - 1]; + body.push(0x00); + let value = Decode!(&single_nat_int_arg(0x7c, &body), Int).unwrap(); + + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); + bench_fn(|| { + let bytes = { + let _p = bench_scope("1. Encoding"); + Encode!(&value).unwrap() + }; + { + let _p = bench_scope("2. Decoding"); + Decode!([config]; &bytes, Int).unwrap(); + } + }) +} + fn main() {} diff --git a/rust/candid/Cargo.toml b/rust/candid/Cargo.toml index 5852658e..583b646f 100644 --- a/rust/candid/Cargo.toml +++ b/rust/candid/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "candid" # sync with the version in `candid_derive/Cargo.toml` -version = "0.10.30" +version = "0.10.31" edition = "2021" rust-version.workspace = true authors = ["DFINITY Team"] @@ -16,7 +16,7 @@ keywords = ["internet-computer", "idl", "candid", "dfinity"] include = ["src", "Cargo.toml", "LICENSE", "README.md"] [dependencies] -candid_derive = { path = "../candid_derive", version = "=0.10.30" } +candid_derive = { path = "../candid_derive", version = "=0.10.31" } ic_principal = { path = "../ic_principal", version = "0.1.0" } binread = { version = "2.2", features = ["debug_template"] } byteorder = "1.5.0" diff --git a/rust/candid/src/types/number.rs b/rust/candid/src/types/number.rs index 64e3501d..845a3165 100644 --- a/rust/candid/src/types/number.rs +++ b/rust/candid/src/types/number.rs @@ -218,21 +218,16 @@ impl Nat { leb128::write::unsigned(w, value)?; return Ok(()); } - let zero = BigUint::from(0u8); - let mut value = self.0.clone(); - loop { - let big_byte = &value & BigUint::from(0x7fu8); - let mut byte = big_byte.to_u8().unwrap(); - value >>= 7; - if value != zero { - byte |= 0x80u8; - } - let buf = [byte]; - w.write_all(&buf)?; - if value == zero { - return Ok(()); - } + // Large value: emit the base-128 LEB128 groups in a single O(n) pass. + // Radix 128 is a power of two, so `to_radix_le` bit-unpacks in O(n) + // instead of the O(n^2) shift-the-whole-bignum-by-7-each-byte loop. + let mut groups = self.0.to_radix_le(128); + let last = groups.len() - 1; + for byte in &mut groups[..last] { + *byte |= 0x80u8; // continuation bit on every group but the last } + w.write_all(&groups)?; + Ok(()) } pub fn decode(r: &mut R) -> crate::Result where @@ -254,23 +249,28 @@ impl Nat { continue; } - let mut result = BigUint::from(small); - result |= BigUint::from(low_bits) << shift; - if byte & 0x80u8 == 0 { - return Ok(Nat(result)); + // Value no longer fits in u64. Collect the remaining LEB128 groups + // and build the BigUint in a single linear pass. Each group is a + // base-128 digit (least significant first); radix 128 is a power of + // two, so `from_radix_le` bit-packs in O(n) instead of the O(n^2) + // repeated shifted-OR accumulation that grows the bignum each byte. + let digits_in_small = (shift / 7) as usize; + let mut groups: Vec = Vec::with_capacity(digits_in_small + 2); + for i in 0..digits_in_small { + groups.push(((small >> (7 * i)) & 0x7f) as u8); } - shift += 7; - loop { + groups.push(byte & 0x7f); + let mut cont = byte & 0x80u8 != 0; + while cont { let mut buf = [0]; r.read_exact(&mut buf)?; let byte = buf[0]; - let low_bits = BigUint::from(byte & 0x7fu8); - result |= low_bits << shift; - if byte & 0x80u8 == 0 { - return Ok(Nat(result)); - } - shift += 7; + groups.push(byte & 0x7f); + cont = byte & 0x80u8 != 0; } + let result = BigUint::from_radix_le(&groups, 128) + .expect("LEB128 groups are valid base-128 digits"); + return Ok(Nat(result)); } } } @@ -285,24 +285,48 @@ impl Int { leb128::write::signed(w, value)?; return Ok(()); } - let zero = BigInt::from(0); - let mut value = self.0.clone(); - loop { - let big_byte = &value & BigInt::from(0xff); - let mut byte = big_byte.to_u8().unwrap(); - value >>= 6; - let done = value == zero || value == BigInt::from(-1); - if done { - byte &= 0x7f; + // Large value: repack the minimal two's-complement little-endian bytes + // into 7-bit sleb128 groups in a single O(n) pass, instead of shifting + // the whole bignum by 7 on every byte (which is O(n^2)). + let bytes = self.0.to_signed_bytes_le(); + let sign_bit = bytes[bytes.len() - 1] >> 7; // 0 = non-negative, 1 = negative + let fill = if sign_bit == 1 { 0xffu8 } else { 0x00 }; + // Highest bit position that differs from the sign bit; every bit above + // it is pure sign extension, so a group cut above it can terminate. + let mut high_diff: isize = -1; + for i in (0..bytes.len()).rev() { + if bytes[i] != fill { + for bit in (0..8).rev() { + if (bytes[i] >> bit) & 1 != sign_bit { + high_diff = (i * 8 + bit) as isize; + break; + } + } + break; + } + } + let bit_at = |p: usize| -> u8 { + let idx = p / 8; + if idx < bytes.len() { + (bytes[idx] >> (p % 8)) & 1 } else { - value >>= 1; - byte |= 0x80; + sign_bit // sign-extend past the explicit bytes } - let buf = [byte]; - w.write_all(&buf)?; - if done { + }; + let mut shift = 0usize; + loop { + let mut group = 0u8; + for k in 0..7 { + group |= bit_at(shift + k) << k; + } + shift += 7; + // sleb128 terminates once the remaining bits are all sign bits and + // the group's own sign bit (0x40) already matches the value's sign. + if (shift as isize) > high_diff && (group >> 6) == sign_bit { + w.write_all(&[group & 0x7f])?; return Ok(()); } + w.write_all(&[group | 0x80])?; } } pub fn decode(r: &mut R) -> crate::Result @@ -346,30 +370,35 @@ impl Int { continue; } - let mut result = BigInt::from(small); - let big_low_bits = BigInt::from(byte & 0x7fu8); - result |= big_low_bits << shift; - shift += 7; - if byte & 0x80 == 0 { - if (byte & 0x40) != 0 { - result |= BigInt::from(-1) << shift; - } - return Ok(Int(result)); + // Value no longer fits in i64. Collect the remaining sleb128 groups + // and build the BigInt in a single linear pass. `small` holds the + // `shift/7` groups already consumed; none have been sign-extended + // yet (that only happens on the terminal byte of the i64 path), so + // each group can be recovered directly from its bits. + let digits_in_small = (shift / 7) as usize; + let mut groups: Vec = Vec::with_capacity(digits_in_small + 2); + for i in 0..digits_in_small { + groups.push(((small >> (7 * i)) & 0x7f) as u8); } - loop { + groups.push(byte & 0x7f); + let mut last = byte; + while last & 0x80 != 0 { let mut buf = [0]; r.read_exact(&mut buf)?; - let byte = buf[0]; - let big_low_bits = BigInt::from(byte & 0x7fu8); - result |= big_low_bits << shift; - shift += 7; - if byte & 0x80 == 0 { - if (byte & 0x40) != 0 { - result |= BigInt::from(-1) << shift; - } - return Ok(Int(result)); - } + last = buf[0]; + groups.push(last & 0x7f); + } + // base-128 magnitude; radix 128 is a power of two => O(n) bit-packing. + let mut result = BigInt::from( + BigUint::from_radix_le(&groups, 128) + .expect("sleb128 groups are valid base-128 digits"), + ); + if last & 0x40 != 0 { + // Sign bit set: reinterpret the magnitude as a two's-complement + // value by subtracting 2^(7 * number_of_groups). + result -= BigInt::from(1) << (7 * groups.len()); } + return Ok(Int(result)); } } } diff --git a/rust/candid/tests/number.rs b/rust/candid/tests/number.rs index 969ae398..6850868a 100644 --- a/rust/candid/tests/number.rs +++ b/rust/candid/tests/number.rs @@ -170,3 +170,146 @@ fn check(num: &str, int_hex: &str, nat_hex: &str) { assert_eq!(decoded, nat); } } + +/// A `len`-byte LEB128 body (`len >= 1`) whose 7-bit groups are all `0x7f`: +/// `len - 1` continuation bytes (`0xff`) followed by a terminator (`0x7f`). +/// Decoded as a `nat` this is `2^(7*len) - 1`; decoded as an `int` (sign bit set +/// on the final group) it is `-1`. Handy for exercising the bignum path with a +/// long value. +fn all_ones_leb128(len: usize) -> Vec { + let mut bytes = vec![0xffu8; len - 1]; + bytes.push(0x7f); + bytes +} + +/// Values straddling the `u64` fast-path boundary must all roundtrip, including +/// the first one that overflows it and so takes the bignum path. +#[test] +fn nat_u64_boundary_roundtrip() { + for v in [ + 0u128, + u128::from(u64::MAX) - 1, + u128::from(u64::MAX), + u128::from(u64::MAX) + 1, + u128::MAX, + ] { + let n = Nat::from(v); + let mut enc = Vec::new(); + n.encode(&mut enc).unwrap(); + let decoded = Nat::decode(&mut &enc[..]).unwrap(); + assert_eq!(decoded, n, "nat boundary value {v}"); + } +} + +/// Values straddling the positive and negative `i64` fast-path boundaries. +#[test] +fn int_i64_boundary_roundtrip() { + for v in [ + 0i128, + i128::from(i64::MAX), + i128::from(i64::MAX) + 1, + i128::from(i64::MIN), + i128::from(i64::MIN) - 1, + i128::MAX, + i128::MIN, + ] { + let i = Int::from(v); + let mut enc = Vec::new(); + i.encode(&mut enc).unwrap(); + let decoded = Int::decode(&mut &enc[..]).unwrap(); + assert_eq!(decoded, i, "int boundary value {v}"); + } +} + +/// Large `nat` values exercise the bignum decode path at scale. `2^(7*len) - 1` +/// is its own canonical LEB128 encoding (all groups are `0x7f`), so re-encoding +/// the decoded value must reproduce the input byte-for-byte. +#[test] +fn nat_large_bignum_roundtrip() { + for len in [10usize, 11, 100, 1000, 50_000] { + let bytes = all_ones_leb128(len); + let n = Nat::decode(&mut &bytes[..]).unwrap(); + let mut re = Vec::new(); + n.encode(&mut re).unwrap(); + assert_eq!(re, bytes, "nat len={len} canonical re-encode"); + // Confirm the value really overflowed the u64 fast path. + assert!(n.0.to_u64().is_none(), "nat len={len} must exceed u64"); + } +} + +/// An all-`0xff`..`0x7f` sleb128 stream decodes to `-1` regardless of length: +/// the sign bit is set and every magnitude bit is one. Exercises the bignum +/// path's two's-complement sign handling for arbitrarily long inputs. +#[test] +fn int_all_continuation_is_minus_one() { + for len in [2usize, 10, 100, 1000, 50_000] { + let bytes = all_ones_leb128(len); + let i = Int::decode(&mut &bytes[..]).unwrap(); + assert_eq!( + i, + Int::from(-1), + "all-0xff sleb128 (len={len}) decodes to -1" + ); + } +} + +/// `len - 1` zero groups (`0x80`) then a `0x7f` terminator decodes to +/// `-2^(7*(len-1))`: a genuine large-magnitude negative bignum. Re-encoding and +/// decoding must be stable. +#[test] +fn int_large_negative_bignum_roundtrip() { + let zero = Int::from(0); + for len in [11usize, 100, 1000, 50_000] { + let mut bytes = vec![0x80u8; len - 1]; + bytes.push(0x7f); + let i = Int::decode(&mut &bytes[..]).unwrap(); + assert!(i < zero, "len={len} should be negative"); + let mut re = Vec::new(); + i.encode(&mut re).unwrap(); + let i2 = Int::decode(&mut &re[..]).unwrap(); + assert_eq!(i, i2, "neg bignum roundtrip len={len}"); + } +} + +/// Randomized roundtrip over large (beyond u64/i64) `nat`/`int` values, covering +/// both signs. `decode` is independently pinned by the exact-byte and +/// closed-form tests above, so `decode(encode(v)) == v` validates that `encode` +/// is semantically correct, and byte-stability under re-encode confirms the +/// output is canonical (minimal). +#[test] +fn bignum_random_roundtrip() { + use rand::Rng; + let mut rng = rand::thread_rng(); + + let roundtrip_nat = |n: &Nat| { + let mut enc = Vec::new(); + n.encode(&mut enc).unwrap(); + let dec = Nat::decode(&mut &enc[..]).unwrap(); + assert_eq!(&dec, n, "nat decode(encode(v)) == v"); + let mut enc2 = Vec::new(); + dec.encode(&mut enc2).unwrap(); + assert_eq!(enc, enc2, "nat canonical re-encode"); + }; + let roundtrip_int = |i: &Int| { + let mut enc = Vec::new(); + i.encode(&mut enc).unwrap(); + let dec = Int::decode(&mut &enc[..]).unwrap(); + assert_eq!(&dec, i, "int decode(encode(v)) == v"); + let mut enc2 = Vec::new(); + dec.encode(&mut enc2).unwrap(); + assert_eq!(enc, enc2, "int canonical re-encode"); + }; + + for _ in 0..1000 { + // Grow well past u64/i64 (>= 2^150) so the bignum path is exercised. + let mut nat = Nat::from(rng.gen::()); + let mut int = Int::from(rng.gen::()); + for _ in 0..rng.gen_range(4..12) { + nat = nat * (1u64 << 50) + rng.gen::(); + int = int * (1i64 << 50) + rng.gen::(); + } + roundtrip_nat(&nat); + roundtrip_int(&int); + roundtrip_int(&(int * (-1i128))); // exercise the negative encode path too + } +} diff --git a/rust/candid_derive/Cargo.toml b/rust/candid_derive/Cargo.toml index c5a6d962..4f2f4ec5 100644 --- a/rust/candid_derive/Cargo.toml +++ b/rust/candid_derive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "candid_derive" # sync with the version in `candid/Cargo.toml` -version = "0.10.30" +version = "0.10.31" edition = "2021" rust-version.workspace = true authors = ["DFINITY Team"] diff --git a/tools/didc/Cargo.toml b/tools/didc/Cargo.toml index df3cd7ee..215aec01 100644 --- a/tools/didc/Cargo.toml +++ b/tools/didc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "didc" -version = "0.6.1" +version = "0.6.2" authors = ["DFINITY Team"] edition = "2021" description = "Candid command line tool"