• tpch-kudu


    1.在impala里建立好文本表:

    create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/customer';
    create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/lineitem';
    create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/nation';
    create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/orders';
    create external table part (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/part';
    create external table partsupp (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'/user/training/tpch_10g/partsupp';
    create external table region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_100g/region';
    create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/supplier';

    3.建立kudu-impala表

    create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'customer_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'C_CUSTKEY'
    );
    insert into customer_kudu select * from customer;
    
    create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'nation_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'N_NATIONKEY'
    );
    insert into nation_kudu select * from nation;
    
    
    create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'part_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'P_PARTKEY'
    );
    insert into part_kudu select * from part;
    
    
    create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'supplier_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'S_SUPPKEY'
    );
    insert into supplier_kudu select * from supplier;
    
    
    
    create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) 
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'partsupp_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
    );
    insert into partsupp_kudu select * from partsupp;
    
    create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) 
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'region_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'R_REGIONKEY'
    );
    insert into region_kudu select * from region;
    
    
    create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'orders_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'O_ORDERKEY'
    );
    insert into orders_kudu select * from orders;
    
    
    create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
    TBLPROPERTIES(
      'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
      'kudu.table_name' = 'lineitem_kudu',
      'kudu.master_addresses' = 'node1:7051',
      'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
    );
    insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;

    下面是日志信息:

    [node2:21000] > use tpch_10g;
    Query: use tpch_10g
    [node2:21000] > show tables;
    Query: show tables
    +----------+
    | name     |
    +----------+
    | customer |
    | lineitem |
    | nation   |
    | orders   |
    | part     |
    | partsupp |
    | region   |
    | supplier |
    +----------+
    Fetched 8 row(s) in 0.01s
    [node2:21000] > create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'customer_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'C_CUSTKEY'
                  > );
    Query: create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'customer_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'C_CUSTKEY'
    )
    
    Fetched 0 row(s) in 0.68s
    [node2:21000] > insert into customer_kudu select * from customer;
    Query: insert into customer_kudu select * from customer
    Inserted 1500000 row(s) in 19.02s
    [node2:21000] > show tables;
    Query: show tables
    +---------------+
    | name          |
    +---------------+
    | customer      |
    | customer_kudu |
    | lineitem      |
    | nation        |
    | orders        |
    | part          |
    | partsupp      |
    | region        |
    | supplier      |
    +---------------+
    Fetched 9 row(s) in 0.01s
    [node2:21000] > create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'nation_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'N_NATIONKEY'
                  > );
    Query: create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'nation_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'N_NATIONKEY'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into nation_kudu select * from nation;
    Query: insert into nation_kudu select * from nation
    Inserted 25 row(s) in 4.26s
    [node2:21000] > create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'part_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'P_PARTKEY'
                  > );
    Query: create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'part_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'P_PARTKEY'
    )
    
    Fetched 0 row(s) in 0.68s
    [node2:21000] > insert into part_kudu select * from part;
    Query: insert into part_kudu select * from part
    Inserted 2000000 row(s) in 22.71s
    [node2:21000] > create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'supplier_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'S_SUPPKEY'
                  > );
    Query: create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'supplier_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'S_SUPPKEY'
    )
    
    Fetched 0 row(s) in 0.73s
    [node2:21000] > insert into supplier_kudu select * from supplier_kudu;
    Query: insert into supplier_kudu select * from supplier_kudu
    Inserted 0 row(s) in 4.67s
    [node2:21000] > create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) 
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'partsupp_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
                  > );
    Query: create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'partsupp_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into partsupp_kudu select * from partsupp_kudu;
    Query: insert into partsupp_kudu select * from partsupp_kudu
    Inserted 0 row(s) in 3.07s
    [node2:21000] > insert into supplier_kudu select * from supplier;
    Query: insert into supplier_kudu select * from supplier
    Inserted 100000 row(s) in 1.78s
    [node2:21000] > insert into partsupp_kudu select * from partsupp;
    Query: insert into partsupp_kudu select * from partsupp
    Inserted 8000000 row(s) in 64.44s
    [node2:21000] > create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) 
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'region_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'R_REGIONKEY'
                  > );
    Query: create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'region_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'R_REGIONKEY'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into region_kudu select * from region;
    Query: insert into region_kudu select * from region
    Inserted 5 row(s) in 4.05s
    
    
    [node2:21000] > create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'lineitem_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
                  > );
    Query: create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'lineitem_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
    )
    
    Fetched 0 row(s) in 0.72s
    [node2:21000] > insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;
    Query: insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem
    Inserted 59986052 row(s) in 951.42s
    
    [node2:21000] > create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
                  > TBLPROPERTIES(
                  >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
                  >   'kudu.table_name' = 'orders_kudu',
                  >   'kudu.master_addresses' = 'node1:7051',
                  >   'kudu.key_columns' = 'O_ORDERKEY'
                  > );
    Query: create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
    TBLPROPERTIES(
    'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
    'kudu.table_name' = 'orders_kudu',
    'kudu.master_addresses' = 'node1:7051',
    'kudu.key_columns' = 'O_ORDERKEY'
    )
    
    Fetched 0 row(s) in 0.86s
    [node2:21000] > insert into orders_kudu select * from orders;
    Query: insert into orders_kudu select * from orders
    Inserted 15000000 row(s) in 133.11s
    [node2:21000] > 

    3.计算表的统计信息:

    compute stats customer;
    compute stats lineitem;
    compute stats nation;
    compute stats orders;
    compute stats part;
    compute stats partsupp;
    compute stats region;
    compute stats supplier;

     

     compute stats customer_kudu;
     ompute stats lineitem_kudu;
     compute stats nation_kudu;
     compute stats orders_kudu;
     compute stats part_kudu;
     compute stats partsupp_kudu;
     compute stats region_kudu;
     compute stats supplier_kudu;

  • 相关阅读:
    MySql 获取当前节点及递归所有上级节点
    MySql创建树结构递归查询存储过程
    F2工作流引擎Web层全新扁平化UI上线
    F2工作流引擎参与者类型成员的交、并、互拆计算规则
    F2工作流引擎之组织用户模型(四)
    F2工作流引擎之 工作流运转模型(三)
    F2工作流引擎之 概述(一)
    离线安装docker,并导入docker镜像
    sudo: /usr/bin/sudo must be owned by uid 0 and have the setuid bit set 的解决办法
    yml 文件中使用环境变量
  • 原文地址:https://www.cnblogs.com/littlesuccess/p/4867888.html
Copyright © 2020-2023  润新知