• 20220210打卡


    数仓搭建之DWD层

    DWD启动表数据解析

    创建启动表

    建表语句

    hive (gmall)> drop table if exists dwd_start_log;
    hive (gmall)> CREATE EXTERNAL TABLE dwd_start_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `entry` string, 
    `open_ad_type` string, 
    `action` string, 
    `loading_time` string, 
    `detail` string, 
    `extend1` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_start_log/';
    

    向启动表导入数据

    hive (gmall)> 
    insert overwrite table dwd_start_log
    PARTITION (dt='2021-09-16')
    select 
        get_json_object(line,'$.mid') mid_id,
        get_json_object(line,'$.uid') user_id,
        get_json_object(line,'$.vc') version_code,
        get_json_object(line,'$.vn') version_name,
        get_json_object(line,'$.l') lang,
        get_json_object(line,'$.sr') source,
        get_json_object(line,'$.os') os,
        get_json_object(line,'$.ar') area,
        get_json_object(line,'$.md') model,
        get_json_object(line,'$.ba') brand,
        get_json_object(line,'$.sv') sdk_version,
        get_json_object(line,'$.g') gmail,
        get_json_object(line,'$.hw') height_width,
        get_json_object(line,'$.t') app_time,
        get_json_object(line,'$.nw') network,
        get_json_object(line,'$.ln') lng,
        get_json_object(line,'$.la') lat,
        get_json_object(line,'$.entry') entry,
        get_json_object(line,'$.open_ad_type') open_ad_type,
        get_json_object(line,'$.action') action,
        get_json_object(line,'$.loading_time') loading_time,
        get_json_object(line,'$.detail') detail,
        get_json_object(line,'$.extend1') extend1
    from ods_start_log 
    where dt='2021-09-16';
    

    查询是否导入成功

    hive (gmall)> select * from dwd_start_log limit 2;
    

    DWD层启动表加载数据脚本

    DWD 事件表数据解析

    创建基础明细表

    hive (gmall)> 
    drop table if exists dwd_base_event_log;
    CREATE EXTERNAL TABLE dwd_base_event_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string, 
    `app_time` string, 
    `network` string, 
    `lng` string, 
    `lat` string, 
    `event_name` string, 
    `event_json` string, 
    `server_time` string)
    PARTITIONED BY (`dt` string)
    stored as parquet
    location '/warehouse/gmall/dwd/dwd_base_event_log/';
    

    自定义UDF函数

    自定义UDTF函数

    将jar包添加到Hive的classpath

    hive (gmall)> add jar /opt/hive-3.1.2/lib/hivefunction-1.0-SNAPSHOT.jar;
    

    添加临时函数

    hive (gmall)> create temporary function base_analizer as 'com.gazikel.udf.BaseFieldUDF';
    
    hive (gmall)> create temporary function flat_analizer as 'com.gazikel.udtf.EventJsonUDTF';
    

    解析事件日志基础明细表

    insert overwrite table dwd_base_event_log 
    PARTITION (dt='2022-02-09')
    select
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    event_name,
    event_json,
    server_time
    from
    (
    select
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[0] as mid_id,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[1] as user_id,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[2] as version_code,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[3] as version_name,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[4] as lang,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[5] as source,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[6] as os,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[7] as area,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[8] as model,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[9] as brand,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[10] as sdk_version,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[11] as gmail,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[12] as height_width,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[13] as app_time,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[14] as network,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[15] as lng,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[16] as lat,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[17] as ops,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[18] as server_time
    from ods_event_log 
    where 
    dt='2022-02-09' 
    and 
    base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la')<>'') 
    sdk_log lateral view flat_analizer(ops) tmp_k as event_name, event_json;
    

    测试是否导入成功:

    hive (gmall)> select * from dwd_base_event_log limit 10;
    

    DWD层数据解析脚本

    DWD层事件表获取

    商品点击表

    1. 建表语句
    hive (gmall)> CREATE EXTERNAL TABLE dwd_display_log(
                > `mid_id` string,
                > `user_id` string,
                > `version_code` string,
                > `version_name` string,
                > `lang` string,
                > `source` string,
                > `os` string,
                > `area` string,
                > `model` string,
                > `brand` string,
                > `sdk_version` string,
                > `gmail` string,
                > `height_width` string,
                > `app_time` string,
                > `network` string,
                > `lng` string,
                > `lat` string,
                > `action` string,
                > `goodsid` string,
                > `place` string,
                > `extend1` string,
                > `category` string,
                > `server_time` string
                > )
                > PARTITIONED BY (dt string)
                > location '/warehouse/gmall/dwd/dwd_display_log/';
    
    1. 导入数据
    insert overwrite table dwd_display_log PARTITION (dt='2022-02-08')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.goodsid') goodsid,
    get_json_object(event_json,'$.kv.place') place,
    get_json_object(event_json,'$.kv.extend1') extend1,
    get_json_object(event_json,'$.kv.category') category,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-08' and event_name='display';
    

    商品详情页表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_newsdetail_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string, 
    `app_time` string,  
    `network` string, 
    `lng` string, 
    `lat` string, 
    `entry` string,
    `action` string,
    `goodsid` string,
    `showtype` string,
    `news_staytime` string,
    `loading_time` string,
    `type1` string,
    `category` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_newsdetail_log/';
    
    
    1. 导入数据
    insert overwrite table dwd_newsdetail_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.entry') entry,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.goodsid') goodsid,
    get_json_object(event_json,'$.kv.showtype') showtype,
    get_json_object(event_json,'$.kv.news_staytime') news_staytime,
    get_json_object(event_json,'$.kv.loading_time') loading_time,
    get_json_object(event_json,'$.kv.type1') type1,
    get_json_object(event_json,'$.kv.category') category,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='newsdetail';
    

    商品列表页表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_loading_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string,
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `action` string,
    `loading_time` string,
    `loading_way` string,
    `extend1` string,
    `extend2` string,
    `type` string,
    `type1` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_loading_log/';
    
    1. 导入数据
    insert overwrite table dwd_loading_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.loading_time') loading_time,
    get_json_object(event_json,'$.kv.loading_way') loading_way,
    get_json_object(event_json,'$.kv.extend1') extend1,
    get_json_object(event_json,'$.kv.extend2') extend2,
    get_json_object(event_json,'$.kv.type') type,
    get_json_object(event_json,'$.kv.type1') type1,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='loading';
    

    广告表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_ad_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `entry` string,
    `action` string,
    `content` string,
    `detail` string,
    `ad_source` string,
    `behavior` string,
    `newstype` string,
    `show_style` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_ad_log/';
    
    
    1. 导入数据
    insert overwrite table dwd_ad_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.entry') entry,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.content') content,
    get_json_object(event_json,'$.kv.detail') detail,
    get_json_object(event_json,'$.kv.source') ad_source,
    get_json_object(event_json,'$.kv.behavior') behavior,
    get_json_object(event_json,'$.kv.newstype') newstype,
    get_json_object(event_json,'$.kv.show_style') show_style,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-09' and event_name='ad';
    

    消息通知表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_notification_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string,
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `action` string,
    `noti_type` string,
    `ap_time` string,
    `content` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_notification_log/';
    
    1. 导入数据
    insert overwrite table dwd_notification_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.noti_type') noti_type,
    get_json_object(event_json,'$.kv.ap_time') ap_time,
    get_json_object(event_json,'$.kv.content') content,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='notification';
    

    用户前台活跃表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_active_foreground_log(
    `mid_id` string,
    `user_id` string,
    `version_code` string,
    `version_name` string,
    `lang` string,
    `source` string,
    `os` string,
    `area` string,
    `model` string,
    `brand` string,
    `sdk_version` string,
    `gmail` string,
    `height_width` string,
    `app_time` string,
    `network` string,
    `lng` string,
    `lat` string,
    `push_id` string,
    `access` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_foreground_log/';
    
    1. 导入数据
    insert overwrite table dwd_active_foreground_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.push_id') push_id,
    get_json_object(event_json,'$.kv.access') access,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='active_foreground';
    

    用户后台活跃表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_active_background_log(
    `mid_id` string,
    `user_id` string,
    `version_code` string,
    `version_name` string,
    `lang` string,
    `source` string,
    `os` string,
    `area` string,
    `model` string,
    `brand` string,
    `sdk_version` string,
    `gmail` string,
     `height_width` string,
    `app_time` string,
    `network` string,
    `lng` string,
    `lat` string,
    `active_source` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_background_log/';
    
    1. 导入数据
    insert overwrite table dwd_active_background_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.active_source') active_source,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='active_background';
    

    评论表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_comment_log(
    `mid_id` string,
    `user_id` string,
    `version_code` string,
    `version_name` string,
    `lang` string,
    `source` string,
    `os` string,
    `area` string,
    `model` string,
    `brand` string,
    `sdk_version` string,
    `gmail` string,
    `height_width` string,
    `app_time` string,
    `network` string,
    `lng` string,
    `lat` string,
    `comment_id` int,
    `userid` int,
    `p_comment_id` int, 
    `content` string,
    `addtime` string,
    `other_id` int,
    `praise_count` int,
    `reply_count` int,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_comment_log/';
    
    1. 导入数据
    insert overwrite table dwd_comment_log PARTITION (dt='2019-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.comment_id') comment_id,
    get_json_object(event_json,'$.kv.userid') userid,
    get_json_object(event_json,'$.kv.p_comment_id') p_comment_id,
    get_json_object(event_json,'$.kv.content') content,
    get_json_object(event_json,'$.kv.addtime') addtime,
    get_json_object(event_json,'$.kv.other_id') other_id,
    get_json_object(event_json,'$.kv.praise_count') praise_count,
    get_json_object(event_json,'$.kv.reply_count') reply_count,
    server_time
    from dwd_base_event_log
    where dt='2019-02-09' and event_name='comment';
    

    收藏表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_favorites_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `id` int, 
    `course_id` int, 
    `userid` int,
    `add_time` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_favorites_log/';
    
    1. 导入数据
    insert overwrite table dwd_favorites_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.id') id,
    get_json_object(event_json,'$.kv.course_id') course_id,
    get_json_object(event_json,'$.kv.userid') userid,
    get_json_object(event_json,'$.kv.add_time') add_time,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-09' and event_name='favorites';
    

    点赞表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_praise_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `id` string, 
    `userid` string, 
    `target_id` string,
    `type` string,
    `add_time` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_praise_log/';
    
    1. 导入数据
    insert overwrite table dwd_praise_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.id') id,
    get_json_object(event_json,'$.kv.userid') userid,
    get_json_object(event_json,'$.kv.target_id') target_id,
    get_json_object(event_json,'$.kv.type') type,
    get_json_object(event_json,'$.kv.add_time') add_time,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='praise';
    

    错误日志表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_error_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `errorBrief` string, 
    `errorDetail` string, 
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_error_log/';
    
    1. 导入数据
    insert overwrite table dwd_error_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.errorBrief') errorBrief,
    get_json_object(event_json,'$.kv.errorDetail') errorDetail,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-09' and event_name='error';
    

    DWD层事件表加载数据脚本

    在/opt/shell目录下新建dwd_event_log.sh脚本

    $ vim /opt/shell/dwd_event_log.sh
    

    添加如下内容:

    #!/bin/bash
    
    # 定义变量方便修改
    APP=gmall
    hive=/opt/module/hive/bin/hive
    
    # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
    if [ -n "$1" ] ;then
    	do_date=$1
    else 
    	do_date=`date -d "-1 day" +%F`  
    fi 
    
    sql="
    set hive.exec.dynamic.partition.mode=nonstrict;
    
    insert overwrite table "$APP".dwd_display_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.goodsid') goodsid,
    	get_json_object(event_json,'$.kv.place') place,
    	get_json_object(event_json,'$.kv.extend1') extend1,
    	get_json_object(event_json,'$.kv.category') category,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='display';
    
    
    insert overwrite table "$APP".dwd_newsdetail_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.entry') entry,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.goodsid') goodsid,
    	get_json_object(event_json,'$.kv.showtype') showtype,
    	get_json_object(event_json,'$.kv.news_staytime') news_staytime,
    	get_json_object(event_json,'$.kv.loading_time') loading_time,
    	get_json_object(event_json,'$.kv.type1') type1,
    	get_json_object(event_json,'$.kv.category') category,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='newsdetail';
    
    
    insert overwrite table "$APP".dwd_loading_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.loading_time') loading_time,
    	get_json_object(event_json,'$.kv.loading_way') loading_way,
    	get_json_object(event_json,'$.kv.extend1') extend1,
    	get_json_object(event_json,'$.kv.extend2') extend2,
    	get_json_object(event_json,'$.kv.type') type,
    	get_json_object(event_json,'$.kv.type1') type1,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='loading';
    
    
    insert overwrite table "$APP".dwd_ad_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.entry') entry,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.content') content,
    	get_json_object(event_json,'$.kv.detail') detail,
    	get_json_object(event_json,'$.kv.source') ad_source,
    	get_json_object(event_json,'$.kv.behavior') behavior,
    	get_json_object(event_json,'$.kv.newstype') newstype,
    	get_json_object(event_json,'$.kv.show_style') show_style,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='ad';
    
    
    insert overwrite table "$APP".dwd_notification_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.noti_type') noti_type,
    	get_json_object(event_json,'$.kv.ap_time') ap_time,
    	get_json_object(event_json,'$.kv.content') content,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='notification';
    
    
    insert overwrite table "$APP".dwd_active_foreground_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    get_json_object(event_json,'$.kv.push_id') push_id,
    get_json_object(event_json,'$.kv.access') access,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='active_foreground';
    
    
    insert overwrite table "$APP".dwd_active_background_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.active_source') active_source,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='active_background';
    
    
    insert overwrite table "$APP".dwd_comment_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.comment_id') comment_id,
    	get_json_object(event_json,'$.kv.userid') userid,
    	get_json_object(event_json,'$.kv.p_comment_id') p_comment_id,
    	get_json_object(event_json,'$.kv.content') content,
    	get_json_object(event_json,'$.kv.addtime') addtime,
    	get_json_object(event_json,'$.kv.other_id') other_id,
    	get_json_object(event_json,'$.kv.praise_count') praise_count,
    	get_json_object(event_json,'$.kv.reply_count') reply_count,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='comment';
    
    
    insert overwrite table "$APP".dwd_favorites_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.id') id,
    	get_json_object(event_json,'$.kv.course_id') course_id,
    	get_json_object(event_json,'$.kv.userid') userid,
    	get_json_object(event_json,'$.kv.add_time') add_time,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='favorites';
    
    
    insert overwrite table "$APP".dwd_praise_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.id') id,
    	get_json_object(event_json,'$.kv.userid') userid,
    	get_json_object(event_json,'$.kv.target_id') target_id,
    	get_json_object(event_json,'$.kv.type') type,
    	get_json_object(event_json,'$.kv.add_time') add_time,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='praise';
    
    
    insert overwrite table "$APP".dwd_error_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.errorBrief') errorBrief,
    	get_json_object(event_json,'$.kv.errorDetail') errorDetail,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='error';
    "
    
    $hive -e "$sql"
    

    给脚本添加可执行权限:

    $ chmod 777 /opt/shell/dwd_event_log.sh
    
  • 相关阅读:
    BOI 2002 双调路径
    BOI'98 DAY 2 TASK 1 CONFERENCE CALL Dijkstra/Dijkstra+priority_queue/SPFA
    USACO 2013 November Contest, Silver Problem 2. Crowded Cows 单调队列
    BOI 2003 Problem. Spaceship
    USACO 2006 November Contest Problem. Road Blocks SPFA
    CEOI 2004 Trial session Problem. Journey DFS
    USACO 2015 January Contest, Silver Problem 2. Cow Routing Dijkstra
    LG P1233 木棍加工 动态规划,Dilworth
    LG P1020 导弹拦截 Dilworth
    USACO 2007 February Contest, Silver Problem 3. Silver Cow Party SPFA
  • 原文地址:https://www.cnblogs.com/Gazikel/p/15878204.html
Copyright © 2020-2023  润新知