Skip to content

Commit 4c9b93b

Browse files
authored
HBASE-30036 Skip redundant delete markers during flush and minor compaction (#7993) (#8036)
Add DeleteTracker.isRedundantDelete() to detect when a delete marker is already covered by a previously tracked delete of equal or broader scope. ScanDeleteTracker implements this for all four delete types: - DeleteFamily/DeleteFamilyVersion: covered by a tracked DeleteFamily - DeleteColumn/Delete: covered by a tracked DeleteFamily or DeleteColumn MinorCompactionScanQueryMatcher calls this check before including a delete marker, returning SEEK_NEXT_COL to skip past all remaining cells covered by the previously tracked delete. Compatible with KEEP_DELETED_CELLS. When set to TRUE, trackDelete() does not populate the delete tracker, so isRedundantDelete() always returns false and all markers are retained. Signed-off-by: Charles Connell <cconnell@apache.org>
1 parent dc2dfb4 commit 4c9b93b

5 files changed

Lines changed: 175 additions & 2 deletions

File tree

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/DeleteTracker.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,20 @@ enum DeleteResult {
8383
// deleted in strong semantics of versions(See MvccTracker)
8484
}
8585

86+
/**
87+
* Check if the given delete marker is redundant, i.e., it is already covered by a previously
88+
* tracked delete of equal or broader scope. A DeleteFamily is redundant if a DeleteFamily with a
89+
* higher timestamp was already seen. A DeleteColumn is redundant if a DeleteColumn for the same
90+
* qualifier with a higher timestamp, or a DeleteFamily with a higher timestamp, was already seen.
91+
* <p>
92+
* This is a read-only check with no side effects on tracker state.
93+
* @param cell the delete marker cell to check
94+
* @return true if the delete marker is redundant and can be skipped
95+
*/
96+
default boolean isRedundantDelete(ExtendedCell cell) {
97+
return false;
98+
}
99+
86100
/**
87101
* Return the comparator passed to this delete tracker
88102
* @return the cell comparator

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/MinorCompactionScanQueryMatcher.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import org.apache.hadoop.hbase.ExtendedCell;
22+
import org.apache.hadoop.hbase.KeyValue;
2223
import org.apache.hadoop.hbase.PrivateCellUtil;
2324
import org.apache.hadoop.hbase.regionserver.ScanInfo;
2425
import org.apache.yetus.audience.InterfaceAudience;
@@ -47,6 +48,19 @@ public MatchCode match(ExtendedCell cell) throws IOException {
4748
// we should not use this delete marker to mask any cell yet.
4849
return MatchCode.INCLUDE;
4950
}
51+
// Check before tracking: an older DeleteColumn or DeleteFamily is redundant if a newer
52+
// one of equal or broader scope was already seen. Must check before trackDelete() since
53+
// that overwrites tracker state. Seek past remaining cells for this column/row since
54+
// they are all covered by the previously tracked delete.
55+
if (deletes.isRedundantDelete(cell)) {
56+
// Skip seeking for deletes with empty qualifier, not to skip a subsequent
57+
// DeleteFamily marker that covers other qualifiers. DeleteFamily itself can seek
58+
// safely because all remaining empty-qualifier cells are redundant under it.
59+
if (cell.getQualifierLength() == 0 && typeByte != KeyValue.Type.DeleteFamily.getCode()) {
60+
return MatchCode.SKIP;
61+
}
62+
return columns.getNextRowOrNextColumn(cell);
63+
}
5064
trackDelete(cell);
5165
return MatchCode.INCLUDE;
5266
}

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanDeleteTracker.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,28 @@ public DeleteResult isDeleted(ExtendedCell cell) {
142142
return DeleteResult.NOT_DELETED;
143143
}
144144

145+
@Override
146+
public boolean isRedundantDelete(ExtendedCell cell) {
147+
byte type = cell.getTypeByte();
148+
boolean coveredByFamily = hasFamilyStamp && cell.getTimestamp() <= familyStamp;
149+
150+
if (
151+
type == KeyValue.Type.DeleteFamily.getCode()
152+
|| type == KeyValue.Type.DeleteFamilyVersion.getCode()
153+
) {
154+
return coveredByFamily;
155+
}
156+
157+
boolean coveredByColumn =
158+
deleteCell != null && deleteType == KeyValue.Type.DeleteColumn.getCode()
159+
&& CellUtil.matchingQualifier(cell, deleteCell) && cell.getTimestamp() <= deleteTimestamp;
160+
161+
if (type == KeyValue.Type.DeleteColumn.getCode() || type == KeyValue.Type.Delete.getCode()) {
162+
return coveredByFamily || coveredByColumn;
163+
}
164+
return false;
165+
}
166+
145167
@Override
146168
public boolean isEmpty() {
147169
return deleteCell == null && !hasFamilyStamp && familyVersionStamps.isEmpty();

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,15 @@ public void testCompactedFiles() throws Exception {
176176
stores[0].getStorefilesCount());
177177

178178
regions[1].compact(false);
179-
assertEquals(flushCount - stores[1].getCompactedFiles().size() + 2,
180-
stores[1].getStorefilesCount());
179+
// HBASE-30036 skips redundant delete markers during minor compaction, so the historical
180+
// file may end up empty and not be created. The count can be +1 or +2.
181+
int minorCompactedCount = stores[1].getStorefilesCount();
182+
int expectedMin = flushCount - stores[1].getCompactedFiles().size() + 1;
183+
int expectedMax = flushCount - stores[1].getCompactedFiles().size() + 2;
184+
assertTrue(
185+
"Expected store file count between " + expectedMin + " and " + expectedMax + " but was "
186+
+ minorCompactedCount,
187+
minorCompactedCount >= expectedMin && minorCompactedCount <= expectedMax);
181188

182189
verifyCells();
183190

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestCompactionScanQueryMatcher.java

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.hadoop.hbase.regionserver.querymatcher;
1919

2020
import static org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode.INCLUDE;
21+
import static org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
2122
import static org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode.SKIP;
2223
import static org.junit.Assert.assertEquals;
2324

@@ -74,6 +75,121 @@ public void testMatch_PartialRangeDropDeletes() throws Exception {
7475
testDropDeletes(row2, row3, new byte[][] { row1, row1 }, INCLUDE, INCLUDE);
7576
}
7677

78+
/**
79+
* Test redundant delete marker handling with COMPACT_RETAIN_DELETES. Cells are auto-generated
80+
* from the given types with decrementing timestamps.
81+
*/
82+
@Test
83+
public void testSkipsRedundantDeleteMarkers() throws IOException {
84+
// Interleaved DeleteColumn + Put. First DC included, put triggers SEEK_NEXT_COL.
85+
assertRetainDeletes(new Type[] { Type.DeleteColumn, Type.Put, Type.DeleteColumn }, INCLUDE,
86+
SEEK_NEXT_COL);
87+
88+
// Contiguous DeleteColumn. First included, rest redundant.
89+
assertRetainDeletes(new Type[] { Type.DeleteColumn, Type.DeleteColumn, Type.DeleteColumn },
90+
INCLUDE, SEEK_NEXT_COL, SEEK_NEXT_COL);
91+
92+
// Contiguous DeleteFamily. First included, rest redundant.
93+
assertRetainDeletes(new Type[] { Type.DeleteFamily, Type.DeleteFamily, Type.DeleteFamily },
94+
INCLUDE, SEEK_NEXT_COL, SEEK_NEXT_COL);
95+
96+
// DF + DFV interleaved. DF included, DFV redundant (SKIP because empty qualifier),
97+
// older DF redundant (SEEK_NEXT_COL), older DFV redundant (SKIP).
98+
assertRetainDeletes(new Type[] { Type.DeleteFamily, Type.DeleteFamilyVersion, Type.DeleteFamily,
99+
Type.DeleteFamilyVersion }, INCLUDE, SKIP, SEEK_NEXT_COL, SKIP);
100+
101+
// Delete (version) covered by DeleteColumn.
102+
assertRetainDeletes(new Type[] { Type.DeleteColumn, Type.Delete, Type.Delete, Type.Delete },
103+
INCLUDE, SEEK_NEXT_COL, SEEK_NEXT_COL, SEEK_NEXT_COL);
104+
105+
// KEEP_DELETED_CELLS=TRUE: all markers retained.
106+
assertRetainDeletes(KeepDeletedCells.TRUE,
107+
new Type[] { Type.DeleteColumn, Type.DeleteColumn, Type.DeleteColumn }, INCLUDE, INCLUDE,
108+
INCLUDE);
109+
}
110+
111+
/**
112+
* Redundant column-level deletes with empty qualifier must not seek past a subsequent
113+
* DeleteFamily. getKeyForNextColumn treats empty qualifier as "no column" and returns
114+
* SEEK_NEXT_ROW, which would skip the DF and all remaining cells in the row.
115+
*/
116+
@Test
117+
public void testEmptyQualifierDeleteDoesNotSkipDeleteFamily() throws IOException {
118+
byte[] emptyQualifier = HConstants.EMPTY_BYTE_ARRAY;
119+
120+
// DC(empty) + DC(empty) redundant + DF must still be reachable.
121+
assertRetainDeletes(emptyQualifier,
122+
new Type[] { Type.DeleteColumn, Type.DeleteColumn, Type.DeleteFamily }, INCLUDE, SKIP,
123+
INCLUDE);
124+
125+
// DC(empty) + Delete(empty) redundant + DF must still be reachable.
126+
assertRetainDeletes(emptyQualifier,
127+
new Type[] { Type.DeleteColumn, Type.Delete, Type.DeleteFamily }, INCLUDE, SKIP, INCLUDE);
128+
}
129+
130+
private void assertRetainDeletes(Type[] types, MatchCode... expected) throws IOException {
131+
assertRetainDeletes(KeepDeletedCells.FALSE, types, expected);
132+
}
133+
134+
private void assertRetainDeletes(byte[] qualifier, Type[] types, MatchCode... expected)
135+
throws IOException {
136+
assertRetainDeletes(KeepDeletedCells.FALSE, qualifier, types, expected);
137+
}
138+
139+
/**
140+
* Build cells from the given types with decrementing timestamps (same ts for adjacent
141+
* family-level and column-level types at the same position). Family-level types (DeleteFamily,
142+
* DeleteFamilyVersion) use empty qualifier; others use col1.
143+
*/
144+
private void assertRetainDeletes(KeepDeletedCells keepDeletedCells, Type[] types,
145+
MatchCode... expected) throws IOException {
146+
assertRetainDeletes(keepDeletedCells, null, types, expected);
147+
}
148+
149+
/**
150+
* Build cells from the given types with decrementing timestamps. If qualifier is null,
151+
* family-level types use empty qualifier and others use col1. If qualifier is specified, all
152+
* types use that qualifier.
153+
*/
154+
private void assertRetainDeletes(KeepDeletedCells keepDeletedCells, byte[] qualifier,
155+
Type[] types, MatchCode... expected) throws IOException {
156+
long now = EnvironmentEdgeManager.currentTime();
157+
ScanInfo scanInfo = new ScanInfo(this.conf, fam1, 0, 1, ttl, keepDeletedCells,
158+
HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false);
159+
CompactionScanQueryMatcher qm =
160+
CompactionScanQueryMatcher.create(scanInfo, ScanType.COMPACT_RETAIN_DELETES, 0L,
161+
HConstants.OLDEST_TIMESTAMP, HConstants.OLDEST_TIMESTAMP, now, null, null, null);
162+
qm.setToNewRow(KeyValueUtil.createFirstOnRow(row1));
163+
164+
long ts = now;
165+
List<MatchCode> actual = new ArrayList<>(expected.length);
166+
for (int i = 0; i < types.length; i++) {
167+
byte[] qual;
168+
if (qualifier != null) {
169+
qual = qualifier;
170+
} else {
171+
boolean familyLevel = types[i] == Type.DeleteFamily || types[i] == Type.DeleteFamilyVersion;
172+
qual = familyLevel ? HConstants.EMPTY_BYTE_ARRAY : col1;
173+
}
174+
KeyValue kv = types[i] == Type.Put
175+
? new KeyValue(row1, fam1, qual, ts, types[i], data)
176+
: new KeyValue(row1, fam1, qual, ts, types[i]);
177+
actual.add(qm.match(kv));
178+
if (actual.size() >= expected.length) {
179+
break;
180+
}
181+
// Decrement ts for next cell, but keep same ts when the next type has lower type code
182+
// at the same logical position (e.g. DF then DFV at the same timestamp).
183+
if (i + 1 < types.length && types[i + 1].getCode() < types[i].getCode()) {
184+
continue;
185+
}
186+
ts--;
187+
}
188+
for (int i = 0; i < expected.length; i++) {
189+
assertEquals("Mismatch at index " + i, expected[i], actual.get(i));
190+
}
191+
}
192+
77193
private void testDropDeletes(byte[] from, byte[] to, byte[][] rows, MatchCode... expected)
78194
throws IOException {
79195
long now = EnvironmentEdgeManager.currentTime();

0 commit comments

Comments
 (0)