Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-1322

Impalad may crash when JOIN predicates contain a mix of CHAR/VARCHAR columns

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Blocker
    • Resolution: Fixed
    • Impala 2.0
    • Impala 2.0
    • None
    • None

    Description

      The query generator runs without crashing when the number of predicates in JOIN is kept to 1 (I've only tried with text columns). But when the predicate count is 2, Impala starts crashing. I suspect it has something to do with mixing column types and sizes.

      [localhost.localdomain:21000] > create table t (c10 char(10), c100 char(100), v100 varchar(100), v200 varchar(200));
      Query: create table t (c10 char(10), c100 char(100), v100 varchar(100), v200 varchar(200))
      
      Fetched 0 row(s) in 0.07s
      
      
      [localhost.localdomain:21000] > insert into t values (cast('a' as char(1)), cast('a' as char(1)), cast('a' as varchar(1)), cast('a' as varchar(1)));
      Query: insert into t values (cast('a' as char(1)), cast('a' as char(1)), cast('a' as varchar(1)), cast('a' as varchar(1)))
      Inserted 1 row(s) in 0.97s
      
      
      [localhost.localdomain:21000] > select * from t;
      Query: select * from t
      +------------+------------------------------------------------------------------------------------------------------+------+------+
      | c10        | c100                                                                                                 | v100 | v200 |
      +------------+------------------------------------------------------------------------------------------------------+------+------+
      | a          | a                                                                                                    | a    | a    |
      +------------+------------------------------------------------------------------------------------------------------+------+------+
      Fetched 1 row(s) in 0.30s
      
      
      [localhost.localdomain:21000] > select * from t join t tt on tt.c10 = t.c100 and tt.v100 = t.c10;
      Query: select * from t join t tt on tt.c10 = t.c100 and tt.v100 = t.c10
      Error communicating with impalad: TSocket read 0 bytes
      [Not connected] > 
      

      stack trace

      (gdb) bt
      #0  0x0000003a0ca32635 in raise () from /lib64/libc.so.6
      #1  0x0000003a0ca33e15 in abort () from /lib64/libc.so.6
      #2  0x0000000001e7db09 in google::DumpStackTraceAndExit () at src/utilities.cc:147
      #3  0x0000000001e7520d in google::LogMessage::Fail () at src/logging.cc:1296
      #4  0x0000000001e78c97 in google::LogMessage::SendToLog (this=0x7f376a5ba120) at src/logging.cc:1250
      #5  0x0000000001e781f6 in google::LogMessage::Flush (this=0x7f376a5ba120) at src/logging.cc:1119
      #6  0x0000000001e7912d in google::LogMessageFatal::~LogMessageFatal (this=0x7f376a5ba120, __in_chrg=<value optimized out>)
          at src/logging.cc:1817
      #7  0x0000000001191146 in impala::Expr::ComputeResultsLayout (exprs=std::vector of length 2, capacity 2 = {...}, offsets=0x94aa340, 
          var_result_begin=0x94aa358) at /data/9/query-gen/Impala/be/src/exprs/expr.cc:305
      #8  0x0000000001191376 in impala::Expr::ComputeResultsLayout (ctxs=std::vector of length 2, capacity 2 = {...}, offsets=0x94aa340, 
          var_result_begin=0x94aa358) at /data/9/query-gen/Impala/be/src/exprs/expr.cc:326
      #9  0x0000000001504e62 in impala::HashTableCtx::HashTableCtx (this=0x94aa310, build_expr_ctxs=std::vector of length 2, capacity 2 = {...}, 
          probe_expr_ctxs=std::vector of length 2, capacity 2 = {...}, stores_nulls=false, finds_nulls=false, initial_seed=3, max_levels=4, 
          num_build_tuples=1) at /data/9/query-gen/Impala/be/src/exec/hash-table.cc:61
      #10 0x00000000014b76ec in impala::PartitionedHashJoinNode::Prepare (this=0x9b42e00, state=0x843c300)
          at /data/9/query-gen/Impala/be/src/exec/partitioned-hash-join-node.cc:118
      #11 0x00000000013bcc77 in impala::PlanFragmentExecutor::Prepare (this=0x4fe61d0, request=...)
          at /data/9/query-gen/Impala/be/src/runtime/plan-fragment-executor.cc:234
      #12 0x0000000001069a7c in impala::ImpalaServer::FragmentExecState::Prepare (this=0x4fe6000, exec_params=...)
          at /data/9/query-gen/Impala/be/src/service/fragment-exec-state.cc:43
      #13 0x0000000000f94ee7 in impala::ImpalaServer::StartPlanFragmentExecution (this=0x6dc4c00, exec_params=...)
          at /data/9/query-gen/Impala/be/src/service/impala-server.cc:1138
      #14 0x0000000000f94044 in impala::ImpalaServer::ExecPlanFragment (this=0x6dc4c00, return_val=..., params=...)
          at /data/9/query-gen/Impala/be/src/service/impala-server.cc:1050
      #15 0x0000000001256f27 in impala::ImpalaInternalServiceProcessor::process_ExecPlanFragment (this=0x6cebf20, seqid=0, iprot=0x88e7cc0, 
          oprot=0x88e7d00, callContext=0x6c5ae40) at /data/9/query-gen/Impala/be/generated-sources/gen-cpp/ImpalaInternalService.cpp:949
      #16 0x0000000001256ccc in impala::ImpalaInternalServiceProcessor::dispatchCall (this=0x6cebf20, iprot=0x88e7cc0, oprot=0x88e7d00, fname=
          "ExecPlanFragment", seqid=0, callContext=0x6c5ae40)
          at /data/9/query-gen/Impala/be/generated-sources/gen-cpp/ImpalaInternalService.cpp:922
      #17 0x0000000000f9eda0 in apache::thrift::TDispatchProcessor::process (this=0x6cebf20, in=..., out=..., connectionContext=0x6c5ae40)
          at /data/9/query-gen/Impala/thirdparty/thrift-0.9.0/build/include/thrift/TDispatchProcessor.h:121
      #18 0x0000000001e34569 in apache::thrift::server::TThreadedServer::Task::run (this=0x7b48600) at src/thrift/server/TThreadedServer.cpp:70
      #19 0x0000000000ee159b in impala::ThriftThread::RunRunnable (this=0x6c5a740, runnable=..., promise=0x7f37c9056480)
          at /data/9/query-gen/Impala/be/src/rpc/thrift-thread.cc:61
      #20 0x0000000000ee2d4b in boost::_mfi::mf2<void, impala::ThriftThread, boost::shared_ptr<apache::thrift::concurrency::Runnable>, impala::Promise<unsigned long>*>::operator() (this=0x6cf7a40, p=0x6c5a740, a1=..., a2=0x7f37c9056480)
          at /usr/include/boost/bind/mem_fn_template.hpp:280
      

      code (bottom DCHECK)

      int Expr::ComputeResultsLayout(const vector<Expr*>& exprs, vector<int>* offsets,
          int* var_result_begin) {
        if (exprs.size() == 0) {
          *var_result_begin = -1;
          return 0;
        }
      
        vector<MemLayoutData> data;
        data.resize(exprs.size());
      
        // Collect all the byte sizes and sort them
        for (int i = 0; i < exprs.size(); ++i) {
          data[i].expr_idx = i;
          if (exprs[i]->type().IsVarLen()) {
            data[i].byte_size = 16;
            data[i].variable_length = true;
          } else {
            data[i].byte_size = exprs[i]->type().GetByteSize();
            data[i].variable_length = false;
          }
          DCHECK_NE(data[i].byte_size, 0);
        }
      
        sort(data.begin(), data.end());
      
        // Walk the types and store in a packed aligned layout
        int max_alignment = sizeof(int64_t);
        int current_alignment = data[0].byte_size;
        int byte_offset = 0;
      
        offsets->resize(exprs.size());
        offsets->clear();
        *var_result_begin = -1;
      
        for (int i = 0; i < data.size(); ++i) {
          DCHECK_GE(data[i].byte_size, current_alignment);
      

      Attachments

        Activity

          People

            victor.bittorf_impala_fcb6 Victor Bittorf
            caseyc casey
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: