• 拉链表-增量更新方法一


    参考文档:http://lxw1234.com/archives/2015/08/473.htm

    一、元表结构

    1、定义业务库原始订单表:

    drop table chavin.orders;

    CREATE TABLE orders (

    orderid INT,

    createtime STRING,

    modifiedtime STRING,

    status STRING

    )row format delimited fields terminated by ' '

    stored AS textfile;

    --加载测试数据

    1 2015-08-18 2015-08-18 创建

    2 2015-08-18 2015-08-18 创建

    3 2015-08-19 2015-08-21 支付

    4 2015-08-19 2015-08-21 完成

    5 2015-08-19 2015-08-20 支付

    6 2015-08-20 2015-08-20 创建

    7 2015-08-20 2015-08-21 支付

    8 2015-08-21 2015-08-21 创建

    2、定义ODS订单表结构,采用日分区存储:

    drop table t_ods_orders_inc;

    CREATE TABLE t_ods_orders_inc (

    orderid INT,

    createtime STRING,

    modifiedtime STRING,

    status STRING

    ) PARTITIONED BY (day STRING)

    row format delimited fields terminated by ' '

    stored AS textfile;

    3、创建dw层历史订单表:

    drop table t_dw_orders_his;

    CREATE TABLE t_dw_orders_his (

    orderid INT,

    createtime STRING,

    modifiedtime STRING,

    status STRING,

    dw_start_date STRING,

    dw_end_date STRING

    )row format delimited fields terminated by ' '

    stored AS textfile;

    二、初始化dw层历史订单表:

    1、将源库订单表历史数据插入到ods订单表中:

    INSERT overwrite TABLE t_ods_orders_inc PARTITION (day = '2015-08-20')

    SELECT orderid,createtime,modifiedtime,status

    FROM chavin.orders

    WHERE cast(createtime as date) <= '2015-08-20';

    2、通过ods层订单表数据初始化dw层历史订单表:

    INSERT overwrite TABLE t_dw_orders_his

    SELECT orderid,createtime,modifiedtime,status,

    createtime AS dw_start_date,

    '9999-12-31' AS dw_end_date

    FROM t_ods_orders_inc

    WHERE day = '2015-08-20';

    三、增量添加数据

    1、将原始订单表增量数据插入到ods层订单表前一天分区中:

    INSERT overwrite TABLE t_ods_orders_inc PARTITION (day = '2015-08-21')

    SELECT orderid,createtime,modifiedtime,status

    FROM orders

    WHERE createtime = '2015-08-21' OR modifiedtime = '2015-08-21';

    2、通过dw历史数据和ods增量数据刷新dw历史数据,此处采用临时表方法:

    DROP TABLE IF EXISTS t_dw_orders_his_tmp;

    CREATE TABLE t_dw_orders_his_tmp AS

    SELECT orderid,

    createtime,

    modifiedtime,

    status,

    dw_start_date,

    dw_end_date

    FROM (

        SELECT a.orderid,

        a.createtime,

        a.modifiedtime,

        a.status,

        a.dw_start_date,

        CASE WHEN b.orderid IS NOT NULL AND a.dw_end_date > '2015-08-21' THEN '2015-08-20' ELSE a.dw_end_date END AS dw_end_date

        FROM t_dw_orders_his a

        left outer join (SELECT * FROM t_ods_orders_inc WHERE day = '2015-08-21') b

        ON (a.orderid = b.orderid)

        UNION ALL

        SELECT orderid,

        createtime,

        modifiedtime,

        status,

        modifiedtime AS dw_start_date,

        '9999-12-31' AS dw_end_date

        FROM t_ods_orders_inc

        WHERE day = '2015-08-21'

    ) x

    ORDER BY orderid,dw_start_date;

    3、根据历史表更新dw层历史订单表:

    INSERT overwrite TABLE t_dw_orders_his

    SELECT * FROM t_dw_orders_his_tmp;

    4、根据上面步骤增加22号数据:

    --加载增量数据到ods层订单表分区'2015-08-22'中:

    1 2015-08-18 2015-08-22 支付

    2 2015-08-18 2015-08-22 完成

    6 2015-08-20 2015-08-22 支付

    9 2015-08-22 2015-08-22 创建

    8 2015-08-22 2015-08-22 支付

    10 2015-08-22 2015-08-22 支付

    alter table t_ods_orders_inc add partition(day='2015-08-22');

    load data local inpath '/opt/datas/orders22.txt' into table chavin.t_ods_orders_inc partition(day='2015-08-22');

    --根据历史订单数据和增量数据更新历史订单表数据,此处采用临时表:

    DROP TABLE IF EXISTS t_dw_orders_his_tmp;

    CREATE TABLE t_dw_orders_his_tmp AS

    SELECT orderid,

    createtime,

    modifiedtime,

    status,

    dw_start_date,

    dw_end_date

    FROM (

        SELECT a.orderid,

        a.createtime,

        a.modifiedtime,

        a.status,

        a.dw_start_date,

        CASE WHEN b.orderid IS NOT NULL AND a.dw_end_date > '2015-08-22' THEN '2015-08-21' ELSE a.dw_end_date END AS dw_end_date

        FROM t_dw_orders_his a

        left outer join (SELECT * FROM t_ods_orders_inc WHERE day = '2015-08-22') b

        ON (a.orderid = b.orderid)

        UNION ALL

        SELECT orderid,

        createtime,

        modifiedtime,

        status,

        modifiedtime AS dw_start_date,

        '9999-12-31' AS dw_end_date

        FROM t_ods_orders_inc

        WHERE day = '2015-08-22'

    ) x

    ORDER BY orderid,dw_start_date;

    --根据临时表更新历史订单表:

    INSERT overwrite TABLE t_dw_orders_his

    SELECT * FROM t_dw_orders_his_tmp;

    5、查看2015-08-21、2015-08-21历史快照:

    select * from t_dw_orders_his where dw_start_date <= '2015-08-21' and dw_end_date >= '2015-08-21';

    select * from t_dw_orders_his where dw_start_date <= '2015-08-22' and dw_end_date >= '2015-08-22';

  • 相关阅读:
    linux笔记
    初探hook的键盘获取
    python pyHook安装
    转: ZigBee/Z-Stack CC2530实现低功耗运行的配置简介
    三种zigbee网络架构详解
    adc verilog spi 时序
    dac verilog ad5601
    verilog 三段式状态机的技巧
    fpga 状态机 检测1011序列
    lattice diamond fpga 状态机的理解
  • 原文地址:https://www.cnblogs.com/wcwen1990/p/7601203.html
Copyright © 2020-2023  润新知