Details
-
Bug
-
Status: Open
-
Major
-
Resolution: Unresolved
-
None
Description
version: hudi-0.14.0-rc1,hudi-0.14.0-rc2
CREATE TABLE `hudi_test`.`tmp_hudi_test_1` (
`id` string,
`name` string,
`dt` bigint,
`day` STRING COMMENT '日期分区',
`hour` INT COMMENT '小时分区'
)using hudi
OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 'false', 'hoodie.datasource.meta.sync.enable' 'false', 'hoodie.datasource.hive_sync.enable' 'false')
tblproperties (
'primaryKey' = 'id',
'type' = 'mor',
'preCombineField'='dt',
'hoodie.index.type' = 'BUCKET',
'hoodie.bucket.index.hash.field' = 'id',
'hoodie.bucket.index.num.buckets'=512
)
PARTITIONED BY (`day`,`hour`);
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 10 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 11 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 as dt, '2023-10-12' as `day`, 12 as `hour`;
select * from `hudi_test`.`tmp_hudi_test_1` where day='2023-10-12' and hour=11;
right stage task number should be 1
if table files is much,would cause driver oom or fullgc for a long time