Details
-
Bug
-
Status: In Progress
-
Minor
-
Resolution: Unresolved
-
None
-
hudi:release-0.11.0
spark: 3.2.1
Description
--source table
create table hudi_test_wm_mor_01 (
id int,
name string,
price double,
ts bigint,
dt string
) using hudi
tblproperties (
type = 'mor',
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (dt);
--target table
create table hudi_test_wm_mor_02 (
id int,
name string,
price double,
ts bigint,
dt string
) using hudi
tblproperties (
type = 'mor',
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (dt);
– insert some data
insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values (12,'a12',23.234,1648871782,'2021-12-11');
insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values (13,'a13',24.234,1648871783,'2021-12-12');
insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values (14,'a14',25.234,1648871784,'2021-12-13');
insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values (15,'a15',26.234,1648871785,'2021-12-14');
insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values (16,'a16',27.234,1648871786,'2021-12-15');
{}{}
insert into hudi_test_wm_mor_02 (id,name,price,ts,dt) values (12,'target12',88.1,1648871782,'2021-12-11');
insert into hudi_test_wm_mor_02 (id,name,price,ts,dt) values (13,'target13',89.1,1648871783,'2021-12-12');
--merge operation
merge into hudi_test_wm_mor_02 h0
using (
select id, name, price, ts, dt from hudi_test_wm_mor_01
) s0
on h0.id = s0.id and h0.dt = s0.dt
when matched then update set * ;
Description:
After the merge sql executes, five partitions are created in the target table (2021-12-11, 2021-12-12, 2021-12-13, 2021-12-14, 2021-12-15).
Actually only two partitions of the data match, creating two partitions as expected (2021-12-11, 2021-12-12)
The remaining 3 partitions should not be created (2021-12-13, 2021-12-14, 2021-12-15).
In extreme cases, a very large number of empty partitions are created in the target table