Skip to content

Commit b087d8a

Browse files
committed
HBASE-28902 Fix cross-CF SEEK_NEXT_USING_HINT degrading to cell-by-cell traversal
StoreScanner uses InnerStoreCellComparator which compares column families by length only. When a filter returns SEEK_NEXT_USING_HINT with a hint targeting a different CF, this comparison produces wrong results, causing the seek to be skipped and falling back to heap.next() for every cell. Fix by using compareFamilies (a final method on CellComparatorImpl that does proper byte comparison) before the full cell comparison. When the hint targets a different CF that sorts ahead, close the store scanner immediately since it has no data for the hint family. We close the scanner rather than seeking because seekAsDirection delegates to KeyValueHeap.requestSeek which also uses InnerStoreCellComparator internally, so the seek would fail there too.
1 parent baaf6dd commit b087d8a

2 files changed

Lines changed: 186 additions & 1 deletion

File tree

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,10 +787,23 @@ public boolean next(List<? super ExtendedCell> outResult, ScannerContext scanner
787787
case SEEK_NEXT_USING_HINT:
788788
ExtendedCell nextKV = matcher.getNextKeyHint(cell);
789789
if (nextKV != null) {
790-
int difference = comparator.compare(nextKV, cell);
790+
// HBASE-28902: InnerStoreCellComparator only compares family lengths,
791+
// which breaks when the hint targets a different column family.
792+
// If the hint has a non-empty family, use compareFamilies (a final
793+
// method that does proper byte comparison) first. If it differs and
794+
// sorts ahead, close this store scanner since it has no data for the
795+
// hint family. Hints with empty family (e.g. from MultiRowRangeFilter)
796+
// are row-only seek hints and use the normal comparator path.
797+
int familyCmp =
798+
nextKV.getFamilyLength() > 0 ? comparator.compareFamilies(nextKV, cell) : 0;
799+
int difference = familyCmp != 0 ? familyCmp : comparator.compare(nextKV, cell);
791800
if (
792801
((!scan.isReversed() && difference > 0) || (scan.isReversed() && difference < 0))
793802
) {
803+
if (familyCmp != 0) {
804+
close(false);
805+
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
806+
}
794807
seekAsDirection(nextKV);
795808
NextState stateAfterSeekByHint = needToReturn();
796809
if (stateAfterSeekByHint != null) {
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
import static org.junit.jupiter.api.Assertions.assertTrue;
22+
23+
import java.io.IOException;
24+
import java.util.ArrayList;
25+
import java.util.List;
26+
import java.util.stream.Stream;
27+
import org.apache.hadoop.hbase.Cell;
28+
import org.apache.hadoop.hbase.CellUtil;
29+
import org.apache.hadoop.hbase.HBaseTestingUtil;
30+
import org.apache.hadoop.hbase.KeyValue;
31+
import org.apache.hadoop.hbase.TableName;
32+
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
33+
import org.apache.hadoop.hbase.client.Put;
34+
import org.apache.hadoop.hbase.client.RegionInfo;
35+
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
36+
import org.apache.hadoop.hbase.client.Scan;
37+
import org.apache.hadoop.hbase.client.TableDescriptor;
38+
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
39+
import org.apache.hadoop.hbase.filter.FilterBase;
40+
import org.apache.hadoop.hbase.testclassification.MediumTests;
41+
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
42+
import org.apache.hadoop.hbase.util.Bytes;
43+
import org.junit.jupiter.api.Tag;
44+
import org.junit.jupiter.params.ParameterizedTest;
45+
import org.junit.jupiter.params.provider.Arguments;
46+
import org.junit.jupiter.params.provider.MethodSource;
47+
48+
/**
49+
* Test that SEEK_NEXT_USING_HINT with a hint pointing to a different column family does not degrade
50+
* into cell-by-cell traversal.
51+
* <p>
52+
* HBASE-28902: StoreScanner uses InnerStoreCellComparator which compares column families by length
53+
* only (an optimization valid within a single store). When the filter hint points to a different
54+
* CF, this comparison can produce the wrong result, causing the seek to be skipped and falling back
55+
* to heap.next() for every cell.
56+
*/
57+
@Tag(RegionServerTests.TAG)
58+
@Tag(MediumTests.TAG)
59+
public class TestCrossCFSeekHint {
60+
61+
private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
62+
63+
private static final byte[] ROW = Bytes.toBytes("row");
64+
private static final byte[] QUAL = Bytes.toBytes("q");
65+
private static final int NUM_CELLS = 100;
66+
67+
// Two CFs with different lengths: "aa" sorts first, "b" sorts second.
68+
private static final byte[] CF_FIRST = Bytes.toBytes("aa");
69+
private static final byte[] CF_SECOND = Bytes.toBytes("b");
70+
71+
// Two CFs with same length: "aa" sorts first, "bb" sorts second.
72+
private static final byte[] CF_SECOND_SAME_LEN = Bytes.toBytes("bb");
73+
74+
static Stream<Arguments> params() {
75+
return Stream.of(
76+
// Forward scan: data in "aa" (first), hint to "b" (second). Different lengths.
77+
Arguments.of(CF_FIRST, CF_SECOND, true, false),
78+
Arguments.of(CF_FIRST, CF_SECOND, false, false),
79+
// Forward scan: data in "aa" (first), hint to "bb" (second). Same lengths.
80+
Arguments.of(CF_FIRST, CF_SECOND_SAME_LEN, true, false),
81+
Arguments.of(CF_FIRST, CF_SECOND_SAME_LEN, false, false),
82+
// Reverse scan: data in "b" (second), hint to "aa" (first). Different lengths.
83+
Arguments.of(CF_SECOND, CF_FIRST, true, true), Arguments.of(CF_SECOND, CF_FIRST, false, true),
84+
// Reverse scan: data in "bb" (second), hint to "aa" (first). Same lengths.
85+
Arguments.of(CF_SECOND_SAME_LEN, CF_FIRST, true, true),
86+
Arguments.of(CF_SECOND_SAME_LEN, CF_FIRST, false, true));
87+
}
88+
89+
/**
90+
* Filter that returns SEEK_NEXT_USING_HINT for cells not in the target family, with a hint
91+
* pointing to the target family. Counts how many times filterCell is called so we can verify the
92+
* scanner didn't traverse all cells.
93+
*/
94+
public static class CrossCFHintFilter extends FilterBase {
95+
private final byte[] targetFamily;
96+
private final byte[] targetQualifier;
97+
private long filterCellCount;
98+
99+
public CrossCFHintFilter(byte[] targetFamily, byte[] targetQualifier) {
100+
this.targetFamily = targetFamily;
101+
this.targetQualifier = targetQualifier;
102+
}
103+
104+
@Override
105+
public ReturnCode filterCell(Cell c) throws IOException {
106+
filterCellCount++;
107+
if (CellUtil.matchingFamily(c, targetFamily)) {
108+
return ReturnCode.INCLUDE;
109+
}
110+
return ReturnCode.SEEK_NEXT_USING_HINT;
111+
}
112+
113+
@Override
114+
public Cell getNextCellHint(Cell currentCell) {
115+
return new KeyValue(CellUtil.cloneRow(currentCell), targetFamily, targetQualifier,
116+
Long.MAX_VALUE, KeyValue.Type.Maximum);
117+
}
118+
119+
public long getFilterCellCount() {
120+
return filterCellCount;
121+
}
122+
}
123+
124+
@ParameterizedTest
125+
@MethodSource("params")
126+
public void testCrossCFSeekHint(byte[] dataCF, byte[] targetCF, boolean flush, boolean reversed)
127+
throws IOException {
128+
String suffix = Bytes.toString(dataCF) + "_" + Bytes.toString(targetCF)
129+
+ (flush ? "_flush" : "_mem") + (reversed ? "_rev" : "");
130+
TableName tableName = TableName.valueOf("TestCrossCFSeekHint_" + suffix);
131+
TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName)
132+
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(dataCF))
133+
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(targetCF)).build();
134+
RegionInfo info = RegionInfoBuilder.newBuilder(tableName).build();
135+
HRegion region = HBaseTestingUtil.createRegionAndWAL(info, TEST_UTIL.getDataTestDir(),
136+
TEST_UTIL.getConfiguration(), td);
137+
138+
try {
139+
for (int i = 0; i < NUM_CELLS; i++) {
140+
Put p = new Put(ROW);
141+
p.addColumn(dataCF, Bytes.toBytes(String.format("q%05d", i)), Bytes.toBytes("v"));
142+
region.put(p);
143+
}
144+
Put p = new Put(ROW);
145+
p.addColumn(targetCF, QUAL, Bytes.toBytes("target"));
146+
region.put(p);
147+
if (flush) {
148+
region.flush(true);
149+
}
150+
151+
CrossCFHintFilter filter = new CrossCFHintFilter(targetCF, QUAL);
152+
Scan scan = new Scan();
153+
scan.setFilter(filter);
154+
scan.setReversed(reversed);
155+
156+
List<Cell> results = new ArrayList<>();
157+
try (RegionScanner scanner = region.getScanner(scan)) {
158+
scanner.next(results);
159+
}
160+
161+
assertEquals(1, results.size(), "Should return the cell from target CF");
162+
assertTrue(CellUtil.matchingFamily(results.get(0), targetCF),
163+
"Result should be from target CF");
164+
165+
// 1 call for the first data CF cell (hint triggers close) + 1 for the target CF cell
166+
assertEquals(2, filter.getFilterCellCount(),
167+
"Cross-CF hint should not cause cell-by-cell traversal (HBASE-28902)");
168+
} finally {
169+
HBaseTestingUtil.closeRegionAndWAL(region);
170+
}
171+
}
172+
}

0 commit comments

Comments
 (0)