apache
diff --git a/‎parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java‎
Lines changed: 46 additions & 0 deletions b/‎parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎parquet-column/src/main/java/org/apache/parquet/column/values/alp/AlpConstants.java‎
Lines changed: 49 additions & 34 deletions b/‎parquet-column/src/main/java/org/apache/parquet/column/values/alp/AlpConstants.java‎
Lines changed: 49 additions & 34 deletions
@@ -50,6 +50,7 @@ public class ParquetProperties {
   public static final int DEFAULT_DICTIONARY_PAGE_SIZE = DEFAULT_PAGE_SIZE;
   public static final boolean DEFAULT_IS_DICTIONARY_ENABLED = true;
   public static final boolean DEFAULT_IS_BYTE_STREAM_SPLIT_ENABLED = false;
+  public static final boolean DEFAULT_IS_ALP_ENABLED = false;
   public static final WriterVersion DEFAULT_WRITER_VERSION = WriterVersion.PARQUET_1_0;
   public static final boolean DEFAULT_ESTIMATE_ROW_COUNT_FOR_PAGE_SIZE_CHECK = true;
   public static final int DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
@@ -132,6 +133,7 @@ public static WriterVersion fromString(String name) {
   private final int pageRowCountLimit;
   private final boolean pageWriteChecksumEnabled;
   private final ColumnProperty<ByteStreamSplitMode> byteStreamSplitEnabled;
+  private final ColumnProperty<Boolean> alpEnabled;
   private final Map<String, String> extraMetaData;
   private final ColumnProperty<Boolean> statistics;
   private final ColumnProperty<Boolean> sizeStatistics;
@@ -164,6 +166,7 @@ private ParquetProperties(Builder builder) {
     this.pageRowCountLimit = builder.pageRowCountLimit;
     this.pageWriteChecksumEnabled = builder.pageWriteChecksumEnabled;
     this.byteStreamSplitEnabled = builder.byteStreamSplitEnabled.build();
+    this.alpEnabled = builder.alpEnabled.build();
     this.extraMetaData = builder.extraMetaData;
     this.statistics = builder.statistics.build();
     this.sizeStatistics = builder.sizeStatistics.build();
@@ -259,6 +262,23 @@ public boolean isByteStreamSplitEnabled(ColumnDescriptor column) {
     }
   }
 
+  /**
+   * Check if ALP encoding is enabled for the given column.
+   * ALP encoding is only supported for FLOAT and DOUBLE types.
+   *
+   * @param column the column descriptor
+   * @return true if ALP encoding is enabled for this column
+   */
+  public boolean isAlpEnabled(ColumnDescriptor column) {
+    switch (column.getPrimitiveType().getPrimitiveTypeName()) {
+      case FLOAT:
+      case DOUBLE:
+        return alpEnabled.getValue(column);
+      default:
+        return false;
+    }
+  }
+
   public ByteBufferAllocator getAllocator() {
     return allocator;
   }
@@ -416,6 +436,7 @@ public static class Builder {
     private int pageRowCountLimit = DEFAULT_PAGE_ROW_COUNT_LIMIT;
     private boolean pageWriteChecksumEnabled = DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED;
     private final ColumnProperty.Builder<ByteStreamSplitMode> byteStreamSplitEnabled;
+    private final ColumnProperty.Builder<Boolean> alpEnabled;
     private Map<String, String> extraMetaData = new HashMap<>();
     private final ColumnProperty.Builder<Boolean> statistics;
     private final ColumnProperty.Builder<Boolean> sizeStatistics;
@@ -427,6 +448,7 @@ private Builder() {
               DEFAULT_IS_BYTE_STREAM_SPLIT_ENABLED
                   ? ByteStreamSplitMode.FLOATING_POINT
                   : ByteStreamSplitMode.NONE);
+      alpEnabled = ColumnProperty.<Boolean>builder().withDefaultValue(DEFAULT_IS_ALP_ENABLED);
       bloomFilterEnabled = ColumnProperty.<Boolean>builder().withDefaultValue(DEFAULT_BLOOM_FILTER_ENABLED);
       bloomFilterNDVs = ColumnProperty.<Long>builder().withDefaultValue(null);
       bloomFilterFPPs = ColumnProperty.<Double>builder().withDefaultValue(DEFAULT_BLOOM_FILTER_FPP);
@@ -457,6 +479,7 @@ private Builder(ParquetProperties toCopy) {
       this.numBloomFilterCandidates = ColumnProperty.builder(toCopy.numBloomFilterCandidates);
       this.maxBloomFilterBytes = toCopy.maxBloomFilterBytes;
       this.byteStreamSplitEnabled = ColumnProperty.builder(toCopy.byteStreamSplitEnabled);
+      this.alpEnabled = ColumnProperty.builder(toCopy.alpEnabled);
       this.extraMetaData = toCopy.extraMetaData;
       this.statistics = ColumnProperty.builder(toCopy.statistics);
       this.sizeStatistics = ColumnProperty.builder(toCopy.sizeStatistics);
@@ -534,6 +557,29 @@ public Builder withExtendedByteStreamSplitEncoding(boolean enable) {
       return this;
     }
 
+    /**
+     * Enable or disable ALP encoding for FLOAT and DOUBLE columns.
+     *
+     * @param enable whether ALP encoding should be enabled
+     * @return this builder for method chaining.
+     */
+    public Builder withAlpEncoding(boolean enable) {
+      this.alpEnabled.withDefaultValue(enable);
+      return this;
+    }
+
+    /**
+     * Enable or disable ALP encoding for the specified column.
+     *
+     * @param columnPath the path of the column (dot-string)
+     * @param enable     whether ALP encoding should be enabled
+     * @return this builder for method chaining.
+     */
+    public Builder withAlpEncoding(String columnPath, boolean enable) {
+      this.alpEnabled.withValue(columnPath, enable);
+      return this;
+    }
+
     /**
      * Set the Parquet format dictionary page size.
      *
 
@@ -18,11 +18,13 @@
  */
 package org.apache.parquet.column.values.alp;
 
+import org.apache.parquet.Preconditions;
+
 /**
  * Constants for the ALP (Adaptive Lossless floating-Point) encoding.
  *
  * <p>ALP encoding converts floating-point values to integers using decimal scaling,
- * then applies Frame of Reference (FOR) encoding and bit-packing.
+ * then applies Frame of Reference encoding and bit-packing.
  * Values that cannot be losslessly converted are stored as exceptions.
  *
  * <p>Based on the paper: "ALP: Adaptive Lossless floating-Point Compression" (SIGMOD 2024)
@@ -43,61 +45,55 @@ private AlpConstants() {
   /** ALP compression mode identifier (0 = ALP) */
   public static final int ALP_COMPRESSION_MODE = 0;
 
-  /** FOR encoding for integers (0 = FOR) */
+  /** Frame of Reference encoding for integers (0 = Frame of Reference) */
   public static final int ALP_INTEGER_ENCODING_FOR = 0;
 
   /** Size of the ALP page header in bytes */
   public static final int ALP_HEADER_SIZE = 8;
 
-  // ========== Vector Constants ==========
+  // ========== Vector Size Constants ==========
 
   /** Default number of elements per compressed vector (2^10 = 1024) */
-  public static final int ALP_VECTOR_SIZE = 1024;
+  public static final int DEFAULT_VECTOR_SIZE = 1024;
 
   /** Log2 of the default vector size */
-  public static final int ALP_VECTOR_SIZE_LOG = 10;
+  public static final int DEFAULT_VECTOR_SIZE_LOG = 10;
+
+  /** Maximum allowed log2 of vector size */
+  static final int MAX_LOG_VECTOR_SIZE = 16;
+
+  /** Minimum allowed log2 of vector size */
+  static final int MIN_LOG_VECTOR_SIZE = 3;
 
   // ========== Exponent/Factor Limits ==========
 
   /** Maximum exponent for float encoding (10^10 ~ 10 billion) */
-  public static final int FLOAT_MAX_EXPONENT = 10;
+  static final int FLOAT_MAX_EXPONENT = 10;
 
   /** Maximum exponent for double encoding (10^18 ~ 1 quintillion) */
-  public static final int DOUBLE_MAX_EXPONENT = 18;
-
-  /** Number of (exponent, factor) combinations for float: sum(1..11) = 66 */
-  public static final int FLOAT_COMBINATIONS = 66;
-
-  /** Number of (exponent, factor) combinations for double: sum(1..19) = 190 */
-  public static final int DOUBLE_COMBINATIONS = 190;
+  static final int DOUBLE_MAX_EXPONENT = 18;
 
   // ========== Sampling Constants ==========
 
-  /** Number of values sampled per vector */
-  public static final int SAMPLER_SAMPLES_PER_VECTOR = 256;
-
-  /** Number of sample vectors per rowgroup */
-  public static final int SAMPLER_SAMPLE_VECTORS_PER_ROWGROUP = 8;
+  /** Number of sample vectors used for preset caching */
+  static final int SAMPLER_SAMPLE_VECTORS = 8;
 
   /** Maximum (exponent, factor) combinations to keep in preset */
-  public static final int MAX_COMBINATIONS = 5;
-
-  /** Stop sampling if this many consecutive combinations produce worse results */
-  public static final int EARLY_EXIT_THRESHOLD = 4;
+  static final int MAX_PRESET_COMBINATIONS = 5;
 
   // ========== Fast Rounding Magic Numbers ==========
 
   /**
    * Magic number for fast float rounding using the floating-point trick.
    * Formula: 2^22 + 2^23 = 12,582,912
    */
-  public static final float MAGIC_FLOAT = 12_582_912.0f;
+  static final float MAGIC_FLOAT = 12_582_912.0f;
 
   /**
    * Magic number for fast double rounding using the floating-point trick.
    * Formula: 2^51 + 2^52 = 6,755,399,441,055,744
    */
-  public static final double MAGIC_DOUBLE = 6_755_399_441_055_744.0;
+  static final double MAGIC_DOUBLE = 6_755_399_441_055_744.0;
 
   // ========== Metadata Sizes ==========
 
@@ -113,24 +109,43 @@ private AlpConstants() {
   // ========== Precomputed Powers of 10 ==========
 
   /** Precomputed powers of 10 for float encoding (10^0 to 10^10) */
-  public static final float[] FLOAT_POW10 = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
+  static final float[] FLOAT_POW10 = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
 
   /** Precomputed powers of 10 for double encoding (10^0 to 10^18) */
-  public static final double[] DOUBLE_POW10 = {
+  static final double[] DOUBLE_POW10 = {
     1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18
   };
 
-  /** Precomputed negative powers of 10 for decoding (10^0 to 10^-18) */
-  public static final double[] DOUBLE_POW10_NEG = {
-    1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9, 1e-10, 1e-11, 1e-12, 1e-13, 1e-14, 1e-15, 1e-16,
-    1e-17, 1e-18
-  };
-
   // ========== Bit Masks for Negative Zero Detection ==========
 
   /** Bit pattern for negative zero in float */
-  public static final int FLOAT_NEGATIVE_ZERO_BITS = 0x80000000;
+  static final int FLOAT_NEGATIVE_ZERO_BITS = 0x80000000;
 
   /** Bit pattern for negative zero in double */
-  public static final long DOUBLE_NEGATIVE_ZERO_BITS = 0x8000000000000000L;
+  static final long DOUBLE_NEGATIVE_ZERO_BITS = 0x8000000000000000L;
+
+  // ========== Validation ==========
+
+  /**
+   * Validate that a vector size is a power of 2 and within the allowed range.
+   *
+   * @param vectorSize the vector size to validate
+   * @return the validated vector size
+   * @throws IllegalArgumentException if the vector size is invalid
+   */
+  static int validateVectorSize(int vectorSize) {
+    Preconditions.checkArgument(
+        vectorSize > 0 && (vectorSize & (vectorSize - 1)) == 0,
+        "Vector size must be a power of 2, got: %s",
+        vectorSize);
+    int logSize = Integer.numberOfTrailingZeros(vectorSize);
+    Preconditions.checkArgument(
+        logSize >= MIN_LOG_VECTOR_SIZE && logSize <= MAX_LOG_VECTOR_SIZE,
+        "Vector size log2 must be between %s and %s, got: %s (vectorSize=%s)",
+        MIN_LOG_VECTOR_SIZE,
+        MAX_LOG_VECTOR_SIZE,
+        logSize,
+        vectorSize);
+    return vectorSize;
+  }
 }