Skip to content

Commit 91f9e2e

Browse files
committed
ALP: Add encoding benchmark for float and double throughput
Benchmark measuring ALP encode and decode throughput across 4 data patterns (decimal, integer, constant, mixed with specials) for both float and double types. Reports compression ratios at startup. Uses carrotsearch JUnit Benchmarks framework matching existing encoding benchmarks in parquet-column (delta, deltalengthbytearray).
1 parent 87b61a3 commit 91f9e2e

1 file changed

Lines changed: 321 additions & 0 deletions

File tree

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.parquet.column.values.alp.benchmark;
20+
21+
import com.carrotsearch.junitbenchmarks.BenchmarkOptions;
22+
import com.carrotsearch.junitbenchmarks.BenchmarkRule;
23+
import com.carrotsearch.junitbenchmarks.annotation.AxisRange;
24+
import com.carrotsearch.junitbenchmarks.annotation.BenchmarkMethodChart;
25+
import java.io.IOException;
26+
import java.nio.ByteBuffer;
27+
import java.util.Random;
28+
import org.apache.parquet.bytes.ByteBufferInputStream;
29+
import org.apache.parquet.column.values.alp.AlpValuesReaderForDouble;
30+
import org.apache.parquet.column.values.alp.AlpValuesReaderForFloat;
31+
import org.apache.parquet.column.values.alp.AlpValuesWriter;
32+
import org.junit.BeforeClass;
33+
import org.junit.Rule;
34+
import org.junit.Test;
35+
36+
/**
37+
* Benchmark for ALP (Adaptive Lossless floating-Point) encoding.
38+
*
39+
* <p>Measures encode and decode throughput for float and double values across
40+
* multiple data patterns: decimal, integer, constant, and mixed with special
41+
* values. Also reports compressed size for compression ratio analysis.
42+
*
43+
* <p>Mirrors the C++ parquet-encoding-alp-benchmark for cross-language
44+
* performance comparison.
45+
*/
46+
@AxisRange(min = 0, max = 1)
47+
@BenchmarkMethodChart(filePrefix = "benchmark-alp-encoding")
48+
public class AlpEncodingBenchmark {
49+
50+
private static final int NUM_VALUES = 50_000; // matching C++ benchmark element count
51+
52+
@Rule
53+
public org.junit.rules.TestRule benchmarkRun = new BenchmarkRule();
54+
55+
// ========== Float data & compressed blobs ==========
56+
private static float[] floatDecimalData;
57+
private static float[] floatIntegerData;
58+
private static float[] floatConstantData;
59+
private static float[] floatMixedData;
60+
61+
private static byte[] floatDecimalCompressed;
62+
private static byte[] floatIntegerCompressed;
63+
private static byte[] floatConstantCompressed;
64+
private static byte[] floatMixedCompressed;
65+
66+
// ========== Double data & compressed blobs ==========
67+
private static double[] doubleDecimalData;
68+
private static double[] doubleIntegerData;
69+
private static double[] doubleConstantData;
70+
private static double[] doubleMixedData;
71+
72+
private static byte[] doubleDecimalCompressed;
73+
private static byte[] doubleIntegerCompressed;
74+
private static byte[] doubleConstantCompressed;
75+
private static byte[] doubleMixedCompressed;
76+
77+
@BeforeClass
78+
public static void prepare() throws IOException {
79+
Random rng = new Random(42);
80+
81+
// --- Float datasets ---
82+
floatDecimalData = new float[NUM_VALUES];
83+
for (int i = 0; i < NUM_VALUES; i++) {
84+
floatDecimalData[i] = Math.round(rng.nextFloat() * 10000) / 100.0f;
85+
}
86+
87+
floatIntegerData = new float[NUM_VALUES];
88+
for (int i = 0; i < NUM_VALUES; i++) {
89+
floatIntegerData[i] = (float) (rng.nextInt(100000));
90+
}
91+
92+
floatConstantData = new float[NUM_VALUES];
93+
for (int i = 0; i < NUM_VALUES; i++) {
94+
floatConstantData[i] = 3.14f;
95+
}
96+
97+
floatMixedData = new float[NUM_VALUES];
98+
for (int i = 0; i < NUM_VALUES; i++) {
99+
floatMixedData[i] = Math.round(rng.nextFloat() * 10000) / 100.0f;
100+
}
101+
// Inject ~2% special values
102+
for (int i = 0; i < NUM_VALUES; i += 50) {
103+
switch (i % 200) {
104+
case 0:
105+
floatMixedData[i] = Float.NaN;
106+
break;
107+
case 50:
108+
floatMixedData[i] = Float.POSITIVE_INFINITY;
109+
break;
110+
case 100:
111+
floatMixedData[i] = Float.NEGATIVE_INFINITY;
112+
break;
113+
case 150:
114+
floatMixedData[i] = -0.0f;
115+
break;
116+
}
117+
}
118+
119+
// --- Double datasets ---
120+
doubleDecimalData = new double[NUM_VALUES];
121+
for (int i = 0; i < NUM_VALUES; i++) {
122+
doubleDecimalData[i] = Math.round(rng.nextDouble() * 10000) / 100.0;
123+
}
124+
125+
doubleIntegerData = new double[NUM_VALUES];
126+
for (int i = 0; i < NUM_VALUES; i++) {
127+
doubleIntegerData[i] = (double) (rng.nextInt(100000));
128+
}
129+
130+
doubleConstantData = new double[NUM_VALUES];
131+
for (int i = 0; i < NUM_VALUES; i++) {
132+
doubleConstantData[i] = 3.14;
133+
}
134+
135+
doubleMixedData = new double[NUM_VALUES];
136+
for (int i = 0; i < NUM_VALUES; i++) {
137+
doubleMixedData[i] = Math.round(rng.nextDouble() * 10000) / 100.0;
138+
}
139+
for (int i = 0; i < NUM_VALUES; i += 50) {
140+
switch (i % 200) {
141+
case 0:
142+
doubleMixedData[i] = Double.NaN;
143+
break;
144+
case 50:
145+
doubleMixedData[i] = Double.POSITIVE_INFINITY;
146+
break;
147+
case 100:
148+
doubleMixedData[i] = Double.NEGATIVE_INFINITY;
149+
break;
150+
case 150:
151+
doubleMixedData[i] = -0.0;
152+
break;
153+
}
154+
}
155+
156+
// --- Pre-compress all datasets ---
157+
floatDecimalCompressed = compressFloats(floatDecimalData);
158+
floatIntegerCompressed = compressFloats(floatIntegerData);
159+
floatConstantCompressed = compressFloats(floatConstantData);
160+
floatMixedCompressed = compressFloats(floatMixedData);
161+
162+
doubleDecimalCompressed = compressDoubles(doubleDecimalData);
163+
doubleIntegerCompressed = compressDoubles(doubleIntegerData);
164+
doubleConstantCompressed = compressDoubles(doubleConstantData);
165+
doubleMixedCompressed = compressDoubles(doubleMixedData);
166+
167+
// --- Print compression ratios ---
168+
System.out.println("=== ALP Compression Ratios ===");
169+
printRatio("Float decimal", floatDecimalCompressed.length, NUM_VALUES * 4);
170+
printRatio("Float integer", floatIntegerCompressed.length, NUM_VALUES * 4);
171+
printRatio("Float constant", floatConstantCompressed.length, NUM_VALUES * 4);
172+
printRatio("Float mixed", floatMixedCompressed.length, NUM_VALUES * 4);
173+
printRatio("Double decimal", doubleDecimalCompressed.length, NUM_VALUES * 8);
174+
printRatio("Double integer", doubleIntegerCompressed.length, NUM_VALUES * 8);
175+
printRatio("Double constant", doubleConstantCompressed.length, NUM_VALUES * 8);
176+
printRatio("Double mixed", doubleMixedCompressed.length, NUM_VALUES * 8);
177+
}
178+
179+
private static void printRatio(String label, int compressedSize, int rawSize) {
180+
double ratio = 100.0 * compressedSize / rawSize;
181+
System.out.printf(" %-20s: %6d / %6d bytes = %5.1f%%%n", label, compressedSize, rawSize, ratio);
182+
}
183+
184+
// ========== Float encode benchmarks ==========
185+
186+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
187+
@Test
188+
public void encodeFloatDecimal() throws IOException {
189+
compressFloats(floatDecimalData);
190+
}
191+
192+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
193+
@Test
194+
public void encodeFloatInteger() throws IOException {
195+
compressFloats(floatIntegerData);
196+
}
197+
198+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
199+
@Test
200+
public void encodeFloatConstant() throws IOException {
201+
compressFloats(floatConstantData);
202+
}
203+
204+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
205+
@Test
206+
public void encodeFloatMixed() throws IOException {
207+
compressFloats(floatMixedData);
208+
}
209+
210+
// ========== Float decode benchmarks ==========
211+
212+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
213+
@Test
214+
public void decodeFloatDecimal() throws IOException {
215+
decompressFloats(floatDecimalCompressed, NUM_VALUES);
216+
}
217+
218+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
219+
@Test
220+
public void decodeFloatInteger() throws IOException {
221+
decompressFloats(floatIntegerCompressed, NUM_VALUES);
222+
}
223+
224+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
225+
@Test
226+
public void decodeFloatConstant() throws IOException {
227+
decompressFloats(floatConstantCompressed, NUM_VALUES);
228+
}
229+
230+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
231+
@Test
232+
public void decodeFloatMixed() throws IOException {
233+
decompressFloats(floatMixedCompressed, NUM_VALUES);
234+
}
235+
236+
// ========== Double encode benchmarks ==========
237+
238+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
239+
@Test
240+
public void encodeDoubleDecimal() throws IOException {
241+
compressDoubles(doubleDecimalData);
242+
}
243+
244+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
245+
@Test
246+
public void encodeDoubleInteger() throws IOException {
247+
compressDoubles(doubleIntegerData);
248+
}
249+
250+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
251+
@Test
252+
public void encodeDoubleConstant() throws IOException {
253+
compressDoubles(doubleConstantData);
254+
}
255+
256+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
257+
@Test
258+
public void encodeDoubleMixed() throws IOException {
259+
compressDoubles(doubleMixedData);
260+
}
261+
262+
// ========== Double decode benchmarks ==========
263+
264+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
265+
@Test
266+
public void decodeDoubleDecimal() throws IOException {
267+
decompressDoubles(doubleDecimalCompressed, NUM_VALUES);
268+
}
269+
270+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
271+
@Test
272+
public void decodeDoubleInteger() throws IOException {
273+
decompressDoubles(doubleIntegerCompressed, NUM_VALUES);
274+
}
275+
276+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
277+
@Test
278+
public void decodeDoubleConstant() throws IOException {
279+
decompressDoubles(doubleConstantCompressed, NUM_VALUES);
280+
}
281+
282+
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
283+
@Test
284+
public void decodeDoubleMixed() throws IOException {
285+
decompressDoubles(doubleMixedCompressed, NUM_VALUES);
286+
}
287+
288+
// ========== Helpers ==========
289+
290+
private static byte[] compressFloats(float[] values) throws IOException {
291+
AlpValuesWriter.FloatAlpValuesWriter writer = new AlpValuesWriter.FloatAlpValuesWriter();
292+
for (float v : values) {
293+
writer.writeFloat(v);
294+
}
295+
return writer.getBytes().toByteArray();
296+
}
297+
298+
private static byte[] compressDoubles(double[] values) throws IOException {
299+
AlpValuesWriter.DoubleAlpValuesWriter writer = new AlpValuesWriter.DoubleAlpValuesWriter();
300+
for (double v : values) {
301+
writer.writeDouble(v);
302+
}
303+
return writer.getBytes().toByteArray();
304+
}
305+
306+
private static void decompressFloats(byte[] compressed, int numValues) throws IOException {
307+
AlpValuesReaderForFloat reader = new AlpValuesReaderForFloat();
308+
reader.initFromPage(numValues, ByteBufferInputStream.wrap(ByteBuffer.wrap(compressed)));
309+
for (int i = 0; i < numValues; i++) {
310+
reader.readFloat();
311+
}
312+
}
313+
314+
private static void decompressDoubles(byte[] compressed, int numValues) throws IOException {
315+
AlpValuesReaderForDouble reader = new AlpValuesReaderForDouble();
316+
reader.initFromPage(numValues, ByteBufferInputStream.wrap(ByteBuffer.wrap(compressed)));
317+
for (int i = 0; i < numValues; i++) {
318+
reader.readDouble();
319+
}
320+
}
321+
}

0 commit comments

Comments
 (0)