Uploaded image for project: 'Apache Hudi'
  1. Apache Hudi
  2. HUDI-3005

flink write hudi throw Not an Avro data file exception

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Major
    • Resolution: Unresolved
    • 0.9.0
    • 0.9.0
    • archiving, flink, table-service
    • None

    Description

      An error is reported when the Flink is written.

      code:

      CREATE TABLE datagen_test (
          id BIGINT,
          name        VARCHAR(20),
          age        int,
          birthday   TIMESTAMP(3),
          ts TIMESTAMP(3)
      ) WITH (
        'connector' = 'datagen',
        'rows-per-second'= '20',
        'fields.id.min' = '1',
        'fields.id.max' = '10000'
      ); 
      
      CREATE TABLE datagen_hudi_test2(
      
      id bigint ,
      
      name string,
      
      birthday TIMESTAMP(3),
      
      ts TIMESTAMP(3),
      
      `partition_str` VARCHAR(20),
      
      primary key(id) not enforced --必须指定uuid 主键
      
      )
      
      PARTITIONED BY (`partition_str`)
      
      with(
      
      'connector'='hudi',
      
      'path'= 'hdfs:///user/hive/warehouse/hudi.db/datagen_hudi_test2'
      
      , 'hoodie.datasource.write.recordkey.field'= 'id'-- 主键
      
      , 'write.precombine.field'= 'ts'-- 自动precombine的字段
      
      , 'write.tasks'= '1'
      
      , 'compaction.tasks'= '1'
      
      , 'write.rate.limit'= '2000'-- 限速
      
      , 'table.type'= 'MERGE_ON_READ'-- 默认COPY_ON_WRITE,可选MERGE_ON_READ 
      
      , 'compaction.async.enabled'= 'true'-- 是否开启异步压缩
      
      , 'compaction.trigger.strategy'= 'num_commits'-- 按次数压缩
      
      , 'compaction.delta_commits'= '5',   -- 默认为5
        'hive_sync.enable' = 'true', 
        'hive_sync.mode' = 'hms'    ,
        'hive_sync.metastore.uris' = '***',
          'hive_sync.table'='datagen_hudi_test2_hivesync',                         
        'hive_sync.db'='hudi'  ,
        'index.global.enabled' = 'true'
      
      );
      
       insert into test.datagen_hudi_test2
      select id,name,birthday,ts as ts,DATE_FORMAT(birthday, 'yyyyMMdd') as `partition_str`
       from test.datagen_test;
       

      error:

      2021-12-14 10:18:53,554 INFO  org.apache.hudi.common.table.log.HoodieLogFormatWriter       [] - HoodieLogFile{pathStr='hdfs:/user/hive/warehouse/hudi.db/datagen_hudi_test2/.hoodie/.commits_.archive.1_1-0-1', fileLen=0} exists. Appending to existing file
      2021-12-14 10:18:53,880 ERROR org.apache.hudi.table.HoodieTimelineArchiveLog               [] - Failed to archive commits, .commit file: 20211210103323.rollback
      org.apache.hudi.org.apache.avro.InvalidAvroMagicException: Not an Avro data file
         at org.apache.hudi.org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:56) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_221]
         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_221]
         at java.lang.Thread.run(Thread.java:748) [?:1.8.0_221]
      2021-12-14 10:18:53,895 ERROR org.apache.hudi.sink.StreamWriteOperatorCoordinator          [] - Executor executes action [commits the instant 20211214101738] error
      org.apache.hudi.exception.HoodieCommitException: Failed to archive commits
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:318) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_221]
         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_221]
         at java.lang.Thread.run(Thread.java:748) [?:1.8.0_221]
      Caused by: org.apache.hudi.org.apache.avro.InvalidAvroMagicException: Not an Avro data file
         at org.apache.hudi.org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:56) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         ... 11 more
      2021-12-14 10:18:53,896 INFO  org.apache.hudi.sink.StreamWriteOperatorCoordinator          [] - Executor executes action [taking checkpoint 2] success!
      2021-12-14 10:18:53,912 INFO  org.apache.flink.runtime.jobmaster.JobMaster                 [] - Trying to recover from a global failure.
      org.apache.flink.util.FlinkException: Global failure triggered by OperatorCoordinator for 'hoodie_stream_write' (operator 37faf88697f0a69c783562897fa7eaeb).
         at org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder$LazyInitializedCoordinatorContext.failJob(OperatorCoordinatorHolder.java:492) ~[flink-dist_2.11-1.12.2.jar:1.12.2]
         at org.apache.hudi.sink.utils.CoordinatorExecutor.exceptionHook(CoordinatorExecutor.java:44) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:76) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_221]
         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_221]
         at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_221]
      Caused by: org.apache.hudi.exception.HoodieException: Executor executes action [commits the instant 20211214101738] error
         ... 5 more
      Caused by: org.apache.hudi.exception.HoodieCommitException: Failed to archive commits
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:318) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         ... 3 more
      Caused by: org.apache.hudi.org.apache.avro.InvalidAvroMagicException: Not an Avro data file
         at org.apache.hudi.org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:56) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         at org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0]
         ... 3 more

      Attachments

        1. screenshot-1.png
          34 kB
          waywtdcc

        Activity

          People

            Unassigned Unassigned
            waywtdcc waywtdcc
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated: