1818 */
1919package org .apache .parquet .column .values .alp ;
2020
21+ import org .apache .parquet .Preconditions ;
22+
2123/**
2224 * Constants for the ALP (Adaptive Lossless floating-Point) encoding.
2325 *
2426 * <p>ALP encoding converts floating-point values to integers using decimal scaling,
25- * then applies Frame of Reference (FOR) encoding and bit-packing.
27+ * then applies Frame of Reference encoding and bit-packing.
2628 * Values that cannot be losslessly converted are stored as exceptions.
2729 *
2830 * <p>Based on the paper: "ALP: Adaptive Lossless floating-Point Compression" (SIGMOD 2024)
@@ -43,61 +45,55 @@ private AlpConstants() {
4345 /** ALP compression mode identifier (0 = ALP) */
4446 public static final int ALP_COMPRESSION_MODE = 0 ;
4547
46- /** FOR encoding for integers (0 = FOR ) */
48+ /** Frame of Reference encoding for integers (0 = Frame of Reference ) */
4749 public static final int ALP_INTEGER_ENCODING_FOR = 0 ;
4850
4951 /** Size of the ALP page header in bytes */
5052 public static final int ALP_HEADER_SIZE = 8 ;
5153
52- // ========== Vector Constants ==========
54+ // ========== Vector Size Constants ==========
5355
5456 /** Default number of elements per compressed vector (2^10 = 1024) */
55- public static final int ALP_VECTOR_SIZE = 1024 ;
57+ public static final int DEFAULT_VECTOR_SIZE = 1024 ;
5658
5759 /** Log2 of the default vector size */
58- public static final int ALP_VECTOR_SIZE_LOG = 10 ;
60+ public static final int DEFAULT_VECTOR_SIZE_LOG = 10 ;
61+
62+ /** Maximum allowed log2 of vector size */
63+ static final int MAX_LOG_VECTOR_SIZE = 16 ;
64+
65+ /** Minimum allowed log2 of vector size */
66+ static final int MIN_LOG_VECTOR_SIZE = 3 ;
5967
6068 // ========== Exponent/Factor Limits ==========
6169
6270 /** Maximum exponent for float encoding (10^10 ~ 10 billion) */
63- public static final int FLOAT_MAX_EXPONENT = 10 ;
71+ static final int FLOAT_MAX_EXPONENT = 10 ;
6472
6573 /** Maximum exponent for double encoding (10^18 ~ 1 quintillion) */
66- public static final int DOUBLE_MAX_EXPONENT = 18 ;
67-
68- /** Number of (exponent, factor) combinations for float: sum(1..11) = 66 */
69- public static final int FLOAT_COMBINATIONS = 66 ;
70-
71- /** Number of (exponent, factor) combinations for double: sum(1..19) = 190 */
72- public static final int DOUBLE_COMBINATIONS = 190 ;
74+ static final int DOUBLE_MAX_EXPONENT = 18 ;
7375
7476 // ========== Sampling Constants ==========
7577
76- /** Number of values sampled per vector */
77- public static final int SAMPLER_SAMPLES_PER_VECTOR = 256 ;
78-
79- /** Number of sample vectors per rowgroup */
80- public static final int SAMPLER_SAMPLE_VECTORS_PER_ROWGROUP = 8 ;
78+ /** Number of sample vectors used for preset caching */
79+ static final int SAMPLER_SAMPLE_VECTORS = 8 ;
8180
8281 /** Maximum (exponent, factor) combinations to keep in preset */
83- public static final int MAX_COMBINATIONS = 5 ;
84-
85- /** Stop sampling if this many consecutive combinations produce worse results */
86- public static final int EARLY_EXIT_THRESHOLD = 4 ;
82+ static final int MAX_PRESET_COMBINATIONS = 5 ;
8783
8884 // ========== Fast Rounding Magic Numbers ==========
8985
9086 /**
9187 * Magic number for fast float rounding using the floating-point trick.
9288 * Formula: 2^22 + 2^23 = 12,582,912
9389 */
94- public static final float MAGIC_FLOAT = 12_582_912.0f ;
90+ static final float MAGIC_FLOAT = 12_582_912.0f ;
9591
9692 /**
9793 * Magic number for fast double rounding using the floating-point trick.
9894 * Formula: 2^51 + 2^52 = 6,755,399,441,055,744
9995 */
100- public static final double MAGIC_DOUBLE = 6_755_399_441_055_744.0 ;
96+ static final double MAGIC_DOUBLE = 6_755_399_441_055_744.0 ;
10197
10298 // ========== Metadata Sizes ==========
10399
@@ -113,24 +109,43 @@ private AlpConstants() {
113109 // ========== Precomputed Powers of 10 ==========
114110
115111 /** Precomputed powers of 10 for float encoding (10^0 to 10^10) */
116- public static final float [] FLOAT_POW10 = {1e0f , 1e1f , 1e2f , 1e3f , 1e4f , 1e5f , 1e6f , 1e7f , 1e8f , 1e9f , 1e10f };
112+ static final float [] FLOAT_POW10 = {1e0f , 1e1f , 1e2f , 1e3f , 1e4f , 1e5f , 1e6f , 1e7f , 1e8f , 1e9f , 1e10f };
117113
118114 /** Precomputed powers of 10 for double encoding (10^0 to 10^18) */
119- public static final double [] DOUBLE_POW10 = {
115+ static final double [] DOUBLE_POW10 = {
120116 1e0 , 1e1 , 1e2 , 1e3 , 1e4 , 1e5 , 1e6 , 1e7 , 1e8 , 1e9 , 1e10 , 1e11 , 1e12 , 1e13 , 1e14 , 1e15 , 1e16 , 1e17 , 1e18
121117 };
122118
123- /** Precomputed negative powers of 10 for decoding (10^0 to 10^-18) */
124- public static final double [] DOUBLE_POW10_NEG = {
125- 1e0 , 1e-1 , 1e-2 , 1e-3 , 1e-4 , 1e-5 , 1e-6 , 1e-7 , 1e-8 , 1e-9 , 1e-10 , 1e-11 , 1e-12 , 1e-13 , 1e-14 , 1e-15 , 1e-16 ,
126- 1e-17 , 1e-18
127- };
128-
129119 // ========== Bit Masks for Negative Zero Detection ==========
130120
131121 /** Bit pattern for negative zero in float */
132- public static final int FLOAT_NEGATIVE_ZERO_BITS = 0x80000000 ;
122+ static final int FLOAT_NEGATIVE_ZERO_BITS = 0x80000000 ;
133123
134124 /** Bit pattern for negative zero in double */
135- public static final long DOUBLE_NEGATIVE_ZERO_BITS = 0x8000000000000000L ;
125+ static final long DOUBLE_NEGATIVE_ZERO_BITS = 0x8000000000000000L ;
126+
127+ // ========== Validation ==========
128+
129+ /**
130+ * Validate that a vector size is a power of 2 and within the allowed range.
131+ *
132+ * @param vectorSize the vector size to validate
133+ * @return the validated vector size
134+ * @throws IllegalArgumentException if the vector size is invalid
135+ */
136+ static int validateVectorSize (int vectorSize ) {
137+ Preconditions .checkArgument (
138+ vectorSize > 0 && (vectorSize & (vectorSize - 1 )) == 0 ,
139+ "Vector size must be a power of 2, got: %s" ,
140+ vectorSize );
141+ int logSize = Integer .numberOfTrailingZeros (vectorSize );
142+ Preconditions .checkArgument (
143+ logSize >= MIN_LOG_VECTOR_SIZE && logSize <= MAX_LOG_VECTOR_SIZE ,
144+ "Vector size log2 must be between %s and %s, got: %s (vectorSize=%s)" ,
145+ MIN_LOG_VECTOR_SIZE ,
146+ MAX_LOG_VECTOR_SIZE ,
147+ logSize ,
148+ vectorSize );
149+ return vectorSize ;
150+ }
136151}
0 commit comments