Skip to content

Commit a441fd3

Browse files
committed
WIP: handle outdated materialized views
1 parent a2960e8 commit a441fd3

7 files changed

Lines changed: 425 additions & 3 deletions

File tree

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ public static MaterializedView createMaterializedView(
358358
snapshot = icebergTable.currentSnapshot();
359359

360360
SourceState sourcestate = ImmutableSourceState.of(type, sourceTableName, sourceTableNamespace, catalogName,
361-
uuid, snapshot.snapshotId(), null, null);
361+
uuid, snapshot == null ? null : snapshot.snapshotId(), null, null);
362362
sourceStates.add(sourcestate);
363363
}
364364
case VIEW -> {

iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,9 @@ public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
569569

570570
hmsTable.setViewOriginalText(mv.getView().properties().get(Catalogs.MATERIALIZED_VIEW_ORIGINAL_TEXT));
571571
hmsTable.setViewExpandedText(mv.getView().sqlFor("hive").sql());
572+
hmsTable.getCreationMetadata().setMaterializationTime(
573+
mv.getView().currentVersion().refreshState().refreshStartTimestampMs()
574+
);
572575
break;
573576

574577
default:
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
-- MV metadata is stored in Iceberg
2+
-- SORT_QUERY_RESULTS
3+
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
4+
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
5+
--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
6+
7+
set hive.explain.user=false;
8+
set hive.support.concurrency=true;
9+
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
10+
set hive.iceberg.materializedview.metadata.location=iceberg;
11+
12+
13+
14+
drop table if exists tbl_ice;
15+
drop table if exists tbl_ice_v2;
16+
17+
create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1');
18+
create external table tbl_ice_v2(d int, e string, f int) stored by iceberg stored as orc tblproperties ('format-version'='2');
19+
20+
insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five v2', 54);
21+
22+
create materialized view mat1
23+
stored by iceberg stored as orc
24+
as
25+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
26+
join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
27+
-- group by tbl_ice.b, tbl_ice.c, tbl_ice_v2.e;
28+
29+
-- view should be empty
30+
select * from mat1;
31+
32+
-- view is up-to-date, use it
33+
explain cbo
34+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
35+
36+
-- insert some new values to one of the source tables
37+
insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54);
38+
39+
-- view is outdated, cannot be used
40+
explain cbo
41+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
42+
43+
alter materialized view mat1 rebuild;
44+
45+
-- view should contain data
46+
select * from mat1;
47+
48+
-- view is up-to-date again, use it
49+
explain cbo
50+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
51+
group by tbl_ice.b, tbl_ice.c, tbl_ice_v2.e;
52+
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
-- MV metadata is stored in Iceberg
2+
-- SORT_QUERY_RESULTS
3+
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
4+
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
5+
--! qt:replace:/(.*snapshotId=)\S+(\}.*)/$1#SnapshotId#$2/
6+
7+
set hive.explain.user=false;
8+
set hive.support.concurrency=true;
9+
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
10+
set hive.iceberg.materializedview.metadata.location=iceberg;
11+
12+
13+
14+
drop table if exists tbl_ice;
15+
drop table if exists tbl_ice_v2;
16+
17+
create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1');
18+
create external table tbl_ice_v2(d int, e string, f int) stored by iceberg stored as orc tblproperties ('format-version'='2');
19+
20+
insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five v2', 54);
21+
insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54);
22+
23+
create materialized view mat1
24+
stored by iceberg stored as orc
25+
tblproperties ('rewriting.time.window' = '1min')
26+
as
27+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
28+
join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
29+
30+
!sleep 61;
31+
32+
-- view should be empty
33+
select * from mat1;
34+
35+
-- view is up-to-date, use it
36+
explain cbo
37+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
38+
39+
40+
41+
-- view is outdated, cannot be used
42+
explain cbo
43+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52;
44+
45+
alter materialized view mat1 rebuild;
46+
47+
-- view should contain data
48+
select * from mat1;
49+
50+
-- view is up-to-date again, use it
51+
explain cbo
52+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
53+
group by tbl_ice.b, tbl_ice.c, tbl_ice_v2.e;
54+
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
PREHOOK: query: drop table if exists tbl_ice
2+
PREHOOK: type: DROPTABLE
3+
PREHOOK: Output: database:default
4+
POSTHOOK: query: drop table if exists tbl_ice
5+
POSTHOOK: type: DROPTABLE
6+
POSTHOOK: Output: database:default
7+
PREHOOK: query: drop table if exists tbl_ice_v2
8+
PREHOOK: type: DROPTABLE
9+
PREHOOK: Output: database:default
10+
POSTHOOK: query: drop table if exists tbl_ice_v2
11+
POSTHOOK: type: DROPTABLE
12+
POSTHOOK: Output: database:default
13+
PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
14+
PREHOOK: type: CREATETABLE
15+
PREHOOK: Output: database:default
16+
PREHOOK: Output: default@tbl_ice
17+
POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
18+
POSTHOOK: type: CREATETABLE
19+
POSTHOOK: Output: database:default
20+
POSTHOOK: Output: default@tbl_ice
21+
PREHOOK: query: create external table tbl_ice_v2(d int, e string, f int) stored by iceberg stored as orc tblproperties ('format-version'='2')
22+
PREHOOK: type: CREATETABLE
23+
PREHOOK: Output: database:default
24+
PREHOOK: Output: default@tbl_ice_v2
25+
POSTHOOK: query: create external table tbl_ice_v2(d int, e string, f int) stored by iceberg stored as orc tblproperties ('format-version'='2')
26+
POSTHOOK: type: CREATETABLE
27+
POSTHOOK: Output: database:default
28+
POSTHOOK: Output: default@tbl_ice_v2
29+
PREHOOK: query: insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five v2', 54)
30+
PREHOOK: type: QUERY
31+
PREHOOK: Input: _dummy_database@_dummy_table
32+
PREHOOK: Output: default@tbl_ice_v2
33+
POSTHOOK: query: insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five v2', 54)
34+
POSTHOOK: type: QUERY
35+
POSTHOOK: Input: _dummy_database@_dummy_table
36+
POSTHOOK: Output: default@tbl_ice_v2
37+
PREHOOK: query: create materialized view mat1
38+
stored by iceberg stored as orc
39+
as
40+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
41+
join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
42+
PREHOOK: type: CREATE_MATERIALIZED_VIEW
43+
PREHOOK: Input: default@tbl_ice
44+
PREHOOK: Input: default@tbl_ice_v2
45+
PREHOOK: Output: database:default
46+
PREHOOK: Output: default@mat1
47+
#### A masked pattern was here ####
48+
POSTHOOK: query: create materialized view mat1
49+
stored by iceberg stored as orc
50+
as
51+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice
52+
join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
53+
POSTHOOK: type: CREATE_MATERIALIZED_VIEW
54+
POSTHOOK: Input: default@tbl_ice
55+
POSTHOOK: Input: default@tbl_ice_v2
56+
POSTHOOK: Output: database:default
57+
POSTHOOK: Output: default@mat1
58+
#### A masked pattern was here ####
59+
POSTHOOK: Lineage: mat1.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ]
60+
POSTHOOK: Lineage: mat1.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ]
61+
POSTHOOK: Lineage: mat1.e SIMPLE [(tbl_ice_v2)tbl_ice_v2.FieldSchema(name:e, type:string, comment:null), ]
62+
PREHOOK: query: select * from mat1
63+
PREHOOK: type: QUERY
64+
PREHOOK: Input: default@mat1
65+
#### A masked pattern was here ####
66+
POSTHOOK: query: select * from mat1
67+
POSTHOOK: type: QUERY
68+
POSTHOOK: Input: default@mat1
69+
#### A masked pattern was here ####
70+
PREHOOK: query: explain cbo
71+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
72+
PREHOOK: type: QUERY
73+
PREHOOK: Input: default@mat1
74+
PREHOOK: Input: default@tbl_ice
75+
PREHOOK: Input: default@tbl_ice_v2
76+
#### A masked pattern was here ####
77+
POSTHOOK: query: explain cbo
78+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
79+
POSTHOOK: type: QUERY
80+
POSTHOOK: Input: default@mat1
81+
POSTHOOK: Input: default@tbl_ice
82+
POSTHOOK: Input: default@tbl_ice_v2
83+
#### A masked pattern was here ####
84+
CBO PLAN:
85+
HiveProject(tbl_ice.b=[$0], tbl_ice.c=[$1], tbl_ice_v2.e=[$2])
86+
HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
87+
88+
PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54)
89+
PREHOOK: type: QUERY
90+
PREHOOK: Input: _dummy_database@_dummy_table
91+
PREHOOK: Output: default@tbl_ice
92+
POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54)
93+
POSTHOOK: type: QUERY
94+
POSTHOOK: Input: _dummy_database@_dummy_table
95+
POSTHOOK: Output: default@tbl_ice
96+
PREHOOK: query: explain cbo
97+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
98+
PREHOOK: type: QUERY
99+
PREHOOK: Input: default@tbl_ice
100+
PREHOOK: Input: default@tbl_ice_v2
101+
#### A masked pattern was here ####
102+
POSTHOOK: query: explain cbo
103+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
104+
POSTHOOK: type: QUERY
105+
POSTHOOK: Input: default@tbl_ice
106+
POSTHOOK: Input: default@tbl_ice_v2
107+
#### A masked pattern was here ####
108+
CBO PLAN:
109+
HiveProject(tbl_ice.b=[$1], tbl_ice.c=[$2], tbl_ice_v2.e=[$4])
110+
HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
111+
HiveProject(a=[$0], b=[$1], c=[$2])
112+
HiveFilter(condition=[AND(>($2, 52), IS NOT NULL($0))])
113+
HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice])
114+
HiveProject(d=[$0], e=[$1])
115+
HiveFilter(condition=[IS NOT NULL($0)])
116+
HiveTableScan(table=[[default, tbl_ice_v2]], table:alias=[tbl_ice_v2])
117+
118+
PREHOOK: query: alter materialized view mat1 rebuild
119+
PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
120+
PREHOOK: Input: default@tbl_ice
121+
PREHOOK: Input: default@tbl_ice_v2
122+
PREHOOK: Output: default@mat1
123+
POSTHOOK: query: alter materialized view mat1 rebuild
124+
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
125+
POSTHOOK: Input: default@tbl_ice
126+
POSTHOOK: Input: default@tbl_ice_v2
127+
POSTHOOK: Output: default@mat1
128+
PREHOOK: query: select * from mat1
129+
PREHOOK: type: QUERY
130+
PREHOOK: Input: default@mat1
131+
#### A masked pattern was here ####
132+
POSTHOOK: query: select * from mat1
133+
POSTHOOK: type: QUERY
134+
POSTHOOK: Input: default@mat1
135+
#### A masked pattern was here ####
136+
five 54 five v2
137+
four 53 four v2
138+
PREHOOK: query: explain cbo
139+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
140+
group by tbl_ice.b, tbl_ice.c, tbl_ice_v2.e
141+
PREHOOK: type: QUERY
142+
PREHOOK: Input: default@mat1
143+
PREHOOK: Input: default@tbl_ice
144+
PREHOOK: Input: default@tbl_ice_v2
145+
#### A masked pattern was here ####
146+
POSTHOOK: query: explain cbo
147+
select tbl_ice.b, tbl_ice.c, tbl_ice_v2.e from tbl_ice join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52
148+
group by tbl_ice.b, tbl_ice.c, tbl_ice_v2.e
149+
POSTHOOK: type: QUERY
150+
POSTHOOK: Input: default@mat1
151+
POSTHOOK: Input: default@tbl_ice
152+
POSTHOOK: Input: default@tbl_ice_v2
153+
#### A masked pattern was here ####
154+
CBO PLAN:
155+
HiveProject(tbl_ice.b=[$0], tbl_ice.c=[$1], tbl_ice_v2.e=[$2])
156+
HiveAggregate(group=[{0, 1, 2}])
157+
HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
158+

0 commit comments

Comments
 (0)