• Hudi通过Hive查询hudi表数据


    环境准备

    集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目录下

    建外部表

    create database db_hudi;
    
    use db_hudi;
    
    CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi(
        order_id BIGINT,
        product_id INT,
        city_id INT,
        district INT,
        county INT,
        type INT,
        combo_type INT,
        traffic_type INT,
        passenger_count INT,
        driver_product_id INT,
        start_dest_distance INT,
        arrive_time STRING,
        departure_time STRING,
        pre_total_fee DOUBLE,
        normal_time STRING,
        bubble_trace_id STRING,
        product_1level INT,
        dest_lng DOUBLE,
        dest_lat DOUBLE,
        starting_lng DOUBLE,
        starting_lat DOUBLE,
        ts BIGINT,
        partitionpath STRING
    )
    PARTITIONED BY(
        date_str string
    )
    ROW FORMAT SERDE
        'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
    STORED AS INPUTFORMAT
        'org.apache.hudi.hadoop.HoodieParquetInputFormat'
    OUTPUTFORMAT
        'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
    LOCATION
        '/hudi-warehouse/tbl_didi_haikou';

    手动加入分区

    --手动添加分区
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9';
    ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';

    查看分区

    SHOW PARTITIONS db_hudi.tbl_hudi_didi;
     

    指标统计

    -- 开发测试,设置运行模式为本地模式
    set hive.exec.mode.local.auto = true;
    
    set hive.exec.mode.local.auto.tasks.max = 10;
    set hive.exec.mode.local.auto.inputbytes.max=88801103;
    set hive.exec.mode.local.auto.input.files.max=50;
    SET hive.mapred.mode=nonstrict;
    -- 指标一:订单类型统计
    WITH tmp as (
        SELECT
            product_id,
            COUNT(1) AS total
        FROM db_hudi.tbl_hudi_didi
        GROUP BY product_id
    )
    SELECT
        CASE product_id
            WHEN 1 THEN "滴滴专车"
            WHEN 2 THEN "滴滴企业专车"
            WHEN 3 THEN "滴滴快车"
            WHEN 4 THEN "滴滴企业快车"
            ELSE "未知"
        END AS order_type,
        total
    FROM tmp
    ;
    
    -- 指标二:订单时效性统计
    WITH tmp as (
        SELECT
            type,
            COUNT(1) AS total
        FROM db_hudi.tbl_hudi_didi
        GROUP BY type
    )
    SELECT
        CASE type
            WHEN 0 THEN "实时"
            WHEN 1 THEN "预约"
            ELSE "未知"
        END AS order_type,
        total
    FROM tmp
    ;
    
    --指标三:订单交通类型统计
    SELECT
        traffic_type,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY traffic_type;
    
    -- 指标五:订单价格统计,先将价格划分区间,再统计,此处使用WHEN函数和SUM函数
    SELECT
        SUM(
            CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END
        ) AS 0_15,
        SUM(
            CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END
        ) AS 16_30,
        SUM(
            CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END
        ) AS 31_50,
        SUM(
            CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END
        ) AS 51_100,
        SUM(
            CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END
        ) AS 100_
    FROM db_hudi.tbl_hudi_didi;
     
  • 相关阅读:
    c++笔记
    python笔记
    《c++不在难学--随老鸟快速通关》
    hexo
    hexo搭建
    《趣学算法》,陈小玉
    从机器学习谈起
    成不了AI高手?因为你根本不懂数据!听听这位老教授多年心血练就的最实用统计学
    PyTorch还是TensorFlow?这有一份新手指南
    win10下机器学习TensorFlow搭建
  • 原文地址:https://www.cnblogs.com/EnzoDin/p/15962045.html
Copyright © 2020-2023  润新知