Details
-
Bug
-
Status: Open
-
Major
-
Resolution: Unresolved
-
3.0.0, 3.1.3
-
None
-
None
Description
reproduction
create database tpch100m; use tpch100m; CREATE EXTERNAL TABLE region ( r_regionkey int, r_name string, r_comment string ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES ( "hive.sql.database.type" = "MYSQL", "hive.sql.jdbc.url" = "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false", "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver", "hive.sql.dbcp.username" = "root", "hive.sql.dbcp.password" = "123456", "hive.sql.table" = "region", "hive.sql.dbcp.maxActive" = "1" ); CREATE EXTERNAL TABLE nation ( n_nationkey int, n_name string, n_regionkey int, n_comment string ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES ( "hive.sql.database.type" = "MYSQL", "hive.sql.jdbc.url" = "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false", "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver", "hive.sql.dbcp.username" = "root", "hive.sql.dbcp.password" = "123456", "hive.sql.table" = "nation", "hive.sql.dbcp.maxActive" = "1" ); select * from nation,region where n_regionkey = r_regionkey;
log
2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] translator.ASTBuilder: The HiveJdbcConverter generated sql message is: SELECT * FROM (SELECT * FROM `nation` WHERE `n_regionkey` IS NOT NULL) AS `t` INNER JOIN (SELECT * FROM `region` WHERE `r_regionkey` IS NOT NULL) AS `t0` ON `t`.`n_regionkey` = `t0`.`r_regionkey` 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: Get metadata for source tables 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] metastore.HiveMetaStore: 0: get_table : tbl=hive.tpch100m.nation 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] HiveMetaStore.audit: ugi=hbk ip=unknown-ip-addr cmd=get_table : tbl=hive.tpch100m.nation 2022-11-09T16:00:35,194 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: Get metadata for subqueries 2022-11-09T16:00:35,194 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: Get metadata for destination tables 2022-11-09T16:00:35,202 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] ql.Context: New scratch dir is hdfs://0.0.0.0:9000/tmp/hive/hbk/04ebb58a-de45-4f8c-9742-06cac555223b/hive_2022-11-09_16-00-34_987_5570790922374716113-1 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] jdbc.JdbcSerDe: Caught exception while initializing the SqlSerDe org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79) ~[hive-jdbc-handler-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) ~[hive-cli-3.1.3.jar:3.1.3] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_342] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_342] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_342] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342] at org.apache.hadoop.util.RunJar.run(RunJar.java:244) ~[hadoop-common-2.9.2.jar:?] at org.apache.hadoop.util.RunJar.main(RunJar.java:158) ~[hadoop-common-2.9.2.jar:?] 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] hive.log: error in initSerDe: org.apache.hadoop.hive.serde2.SerDeException org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns org.apache.hadoop.hive.serde2.SerDeException: org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:104) ~[hive-jdbc-handler-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214) ~[hive-exec-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) ~[hive-cli-3.1.3.jar:3.1.3] at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) ~[hive-cli-3.1.3.jar:3.1.3] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_342] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_342] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_342] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342] at org.apache.hadoop.util.RunJar.run(RunJar.java:244) ~[hadoop-common-2.9.2.jar:?] at org.apache.hadoop.util.RunJar.main(RunJar.java:158) ~[hadoop-common-2.9.2.jar:?] Caused by: org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table definition has 4 columns at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79) ~[hive-jdbc-handler-3.1.3.jar:3.1.3] ... 31 more 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] parse.CalcitePlanner: CBO failed, skipping CBO.
reason
1. The JDBCJoinPushDownRule transforms HiveJoin(HiveJdbcConverter(nation),HiveJdbcConverter(region)) to HiveJdbcConverte(JdbcJoin(nation,region)).
2. HiveJdbcConverter.getTableScan() returns the first JdbcHiveTableScan(nation), but does not return JdbcJoin(nation join region). It is the reason of this bug.
3. When initializing the JdbcSerde, the columns of (nation) and (nation join region) mismatch, so CBO failed.