Skip to content

Commit 78cb02d

Browse files
committed
ALP: Use unpack32Values for long bit unpacking
Process 32 values at a time in unpackLongs instead of 8, reducing the number of method calls by 4x during double vector decompression. Only applied to the long (double) path; int (float) unpacking is unchanged as JIT produces better code with the original 8-value path.
1 parent 8f4cb19 commit 78cb02d

1 file changed

Lines changed: 22 additions & 9 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/values/alp/AlpCompression.java

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -477,21 +477,34 @@ static void packLongs(long[] values, int count, int bitWidth, byte[] output) {
477477
}
478478
}
479479

480-
@SuppressWarnings("deprecation")
481480
static void unpackLongs(byte[] packed, int count, int bitWidth, long[] output) {
482481
BytePackerForLong packer = Packer.LITTLE_ENDIAN.newBytePackerForLong(bitWidth);
483-
int fullGroups = count / 8;
484-
for (int g = 0; g < fullGroups; g++) {
485-
packer.unpack8Values(packed, g * bitWidth, output, g * 8);
482+
483+
// Process 32 values at a time (4x fewer calls than unpack8Values)
484+
int fullGroups32 = count / 32;
485+
for (int g = 0; g < fullGroups32; g++) {
486+
packer.unpack32Values(packed, g * bitWidth * 4, output, g * 32);
486487
}
487-
int remaining = count % 8;
488-
if (remaining > 0) {
488+
489+
// Process remaining 8 at a time
490+
int processed = fullGroups32 * 32;
491+
int byteOffset = fullGroups32 * bitWidth * 4;
492+
int remaining8 = (count - processed) / 8;
493+
for (int g = 0; g < remaining8; g++) {
494+
packer.unpack8Values(packed, byteOffset + g * bitWidth, output, processed + g * 8);
495+
}
496+
497+
// Handle tail (< 8 values)
498+
int tailStart = processed + remaining8 * 8;
499+
int tailCount = count - tailStart;
500+
if (tailCount > 0) {
501+
int tailByteOffset = byteOffset + remaining8 * bitWidth;
489502
byte[] tmp = new byte[bitWidth];
490-
int available = packed.length - fullGroups * bitWidth;
491-
System.arraycopy(packed, fullGroups * bitWidth, tmp, 0, Math.min(available, bitWidth));
503+
int available = packed.length - tailByteOffset;
504+
System.arraycopy(packed, tailByteOffset, tmp, 0, Math.min(available, bitWidth));
492505
long[] padded = new long[8];
493506
packer.unpack8Values(tmp, 0, padded, 0);
494-
System.arraycopy(padded, 0, output, fullGroups * 8, remaining);
507+
System.arraycopy(padded, 0, output, tailStart, tailCount);
495508
}
496509
}
497510
}

0 commit comments

Comments
 (0)