|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + */ |
| 18 | +package org.apache.hadoop.hbase.regionserver; |
| 19 | + |
| 20 | +import static org.junit.jupiter.api.Assertions.assertEquals; |
| 21 | +import static org.junit.jupiter.api.Assertions.assertTrue; |
| 22 | + |
| 23 | +import java.io.IOException; |
| 24 | +import java.util.ArrayList; |
| 25 | +import java.util.List; |
| 26 | +import java.util.stream.Stream; |
| 27 | +import org.apache.hadoop.hbase.Cell; |
| 28 | +import org.apache.hadoop.hbase.CellUtil; |
| 29 | +import org.apache.hadoop.hbase.HBaseTestingUtil; |
| 30 | +import org.apache.hadoop.hbase.KeyValue; |
| 31 | +import org.apache.hadoop.hbase.TableName; |
| 32 | +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; |
| 33 | +import org.apache.hadoop.hbase.client.Put; |
| 34 | +import org.apache.hadoop.hbase.client.RegionInfo; |
| 35 | +import org.apache.hadoop.hbase.client.RegionInfoBuilder; |
| 36 | +import org.apache.hadoop.hbase.client.Scan; |
| 37 | +import org.apache.hadoop.hbase.client.TableDescriptor; |
| 38 | +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; |
| 39 | +import org.apache.hadoop.hbase.filter.FilterBase; |
| 40 | +import org.apache.hadoop.hbase.testclassification.MediumTests; |
| 41 | +import org.apache.hadoop.hbase.testclassification.RegionServerTests; |
| 42 | +import org.apache.hadoop.hbase.util.Bytes; |
| 43 | +import org.junit.jupiter.api.Tag; |
| 44 | +import org.junit.jupiter.params.ParameterizedTest; |
| 45 | +import org.junit.jupiter.params.provider.Arguments; |
| 46 | +import org.junit.jupiter.params.provider.MethodSource; |
| 47 | + |
| 48 | +/** |
| 49 | + * Test that SEEK_NEXT_USING_HINT with a hint pointing to a different column family does not degrade |
| 50 | + * into cell-by-cell traversal. |
| 51 | + * <p> |
| 52 | + * HBASE-28902: StoreScanner uses InnerStoreCellComparator which compares column families by length |
| 53 | + * only (an optimization valid within a single store). When the filter hint points to a different |
| 54 | + * CF, this comparison can produce the wrong result, causing the seek to be skipped and falling back |
| 55 | + * to heap.next() for every cell. |
| 56 | + */ |
| 57 | +@Tag(RegionServerTests.TAG) |
| 58 | +@Tag(MediumTests.TAG) |
| 59 | +public class TestCrossCFSeekHint { |
| 60 | + |
| 61 | + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); |
| 62 | + |
| 63 | + private static final byte[] ROW = Bytes.toBytes("row"); |
| 64 | + private static final byte[] QUAL = Bytes.toBytes("q"); |
| 65 | + private static final int NUM_CELLS = 100; |
| 66 | + |
| 67 | + // Two CFs with different lengths: "aa" sorts first, "b" sorts second. |
| 68 | + private static final byte[] CF_FIRST = Bytes.toBytes("aa"); |
| 69 | + private static final byte[] CF_SECOND = Bytes.toBytes("b"); |
| 70 | + |
| 71 | + // Two CFs with same length: "aa" sorts first, "bb" sorts second. |
| 72 | + private static final byte[] CF_SECOND_SAME_LEN = Bytes.toBytes("bb"); |
| 73 | + |
| 74 | + static Stream<Arguments> params() { |
| 75 | + return Stream.of( |
| 76 | + // Forward scan: data in "aa" (first), hint to "b" (second). Different lengths. |
| 77 | + Arguments.of(CF_FIRST, CF_SECOND, true, false), |
| 78 | + Arguments.of(CF_FIRST, CF_SECOND, false, false), |
| 79 | + // Forward scan: data in "aa" (first), hint to "bb" (second). Same lengths. |
| 80 | + Arguments.of(CF_FIRST, CF_SECOND_SAME_LEN, true, false), |
| 81 | + Arguments.of(CF_FIRST, CF_SECOND_SAME_LEN, false, false), |
| 82 | + // Reverse scan: data in "b" (second), hint to "aa" (first). Different lengths. |
| 83 | + Arguments.of(CF_SECOND, CF_FIRST, true, true), Arguments.of(CF_SECOND, CF_FIRST, false, true), |
| 84 | + // Reverse scan: data in "bb" (second), hint to "aa" (first). Same lengths. |
| 85 | + Arguments.of(CF_SECOND_SAME_LEN, CF_FIRST, true, true), |
| 86 | + Arguments.of(CF_SECOND_SAME_LEN, CF_FIRST, false, true)); |
| 87 | + } |
| 88 | + |
| 89 | + /** |
| 90 | + * Filter that returns SEEK_NEXT_USING_HINT for cells not in the target family, with a hint |
| 91 | + * pointing to the target family. Counts how many times filterCell is called so we can verify the |
| 92 | + * scanner didn't traverse all cells. |
| 93 | + */ |
| 94 | + public static class CrossCFHintFilter extends FilterBase { |
| 95 | + private final byte[] targetFamily; |
| 96 | + private final byte[] targetQualifier; |
| 97 | + private long filterCellCount; |
| 98 | + |
| 99 | + public CrossCFHintFilter(byte[] targetFamily, byte[] targetQualifier) { |
| 100 | + this.targetFamily = targetFamily; |
| 101 | + this.targetQualifier = targetQualifier; |
| 102 | + } |
| 103 | + |
| 104 | + @Override |
| 105 | + public ReturnCode filterCell(Cell c) throws IOException { |
| 106 | + filterCellCount++; |
| 107 | + if (CellUtil.matchingFamily(c, targetFamily)) { |
| 108 | + return ReturnCode.INCLUDE; |
| 109 | + } |
| 110 | + return ReturnCode.SEEK_NEXT_USING_HINT; |
| 111 | + } |
| 112 | + |
| 113 | + @Override |
| 114 | + public Cell getNextCellHint(Cell currentCell) { |
| 115 | + return new KeyValue(CellUtil.cloneRow(currentCell), targetFamily, targetQualifier, |
| 116 | + Long.MAX_VALUE, KeyValue.Type.Maximum); |
| 117 | + } |
| 118 | + |
| 119 | + public long getFilterCellCount() { |
| 120 | + return filterCellCount; |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + @ParameterizedTest |
| 125 | + @MethodSource("params") |
| 126 | + public void testCrossCFSeekHint(byte[] dataCF, byte[] targetCF, boolean flush, boolean reversed) |
| 127 | + throws IOException { |
| 128 | + String suffix = Bytes.toString(dataCF) + "_" + Bytes.toString(targetCF) |
| 129 | + + (flush ? "_flush" : "_mem") + (reversed ? "_rev" : ""); |
| 130 | + TableName tableName = TableName.valueOf("TestCrossCFSeekHint_" + suffix); |
| 131 | + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) |
| 132 | + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(dataCF)) |
| 133 | + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(targetCF)).build(); |
| 134 | + RegionInfo info = RegionInfoBuilder.newBuilder(tableName).build(); |
| 135 | + HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(), |
| 136 | + TEST_UTIL.getConfiguration(), td); |
| 137 | + |
| 138 | + try { |
| 139 | + for (int i = 0; i < NUM_CELLS; i++) { |
| 140 | + Put p = new Put(ROW); |
| 141 | + p.addColumn(dataCF, Bytes.toBytes(String.format("q%05d", i)), Bytes.toBytes("v")); |
| 142 | + region.put(p); |
| 143 | + } |
| 144 | + Put p = new Put(ROW); |
| 145 | + p.addColumn(targetCF, QUAL, Bytes.toBytes("target")); |
| 146 | + region.put(p); |
| 147 | + if (flush) { |
| 148 | + region.flush(true); |
| 149 | + } |
| 150 | + |
| 151 | + CrossCFHintFilter filter = new CrossCFHintFilter(targetCF, QUAL); |
| 152 | + Scan scan = new Scan(); |
| 153 | + scan.setFilter(filter); |
| 154 | + scan.setReversed(reversed); |
| 155 | + |
| 156 | + List<Cell> results = new ArrayList<>(); |
| 157 | + try (RegionScanner scanner = region.getScanner(scan)) { |
| 158 | + scanner.next(results); |
| 159 | + } |
| 160 | + |
| 161 | + assertEquals(1, results.size(), "Should return the cell from target CF"); |
| 162 | + assertTrue(CellUtil.matchingFamily(results.get(0), targetCF), |
| 163 | + "Result should be from target CF"); |
| 164 | + |
| 165 | + // 1 call for the first data CF cell (hint triggers close) + 1 for the target CF cell |
| 166 | + assertEquals(2, filter.getFilterCellCount(), |
| 167 | + "Cross-CF hint should not cause cell-by-cell traversal (HBASE-28902)"); |
| 168 | + } finally { |
| 169 | + HBaseTestingUtil.closeRegionAndWAL(region); |
| 170 | + } |
| 171 | + } |
| 172 | +} |
0 commit comments