• 20220210打卡


    数仓搭建之DWD层

    DWD启动表数据解析

    创建启动表

    建表语句

    hive (gmall)> drop table if exists dwd_start_log;
    hive (gmall)> CREATE EXTERNAL TABLE dwd_start_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `entry` string, 
    `open_ad_type` string, 
    `action` string, 
    `loading_time` string, 
    `detail` string, 
    `extend1` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_start_log/';
    

    向启动表导入数据

    hive (gmall)> 
    insert overwrite table dwd_start_log
    PARTITION (dt='2021-09-16')
    select 
        get_json_object(line,'$.mid') mid_id,
        get_json_object(line,'$.uid') user_id,
        get_json_object(line,'$.vc') version_code,
        get_json_object(line,'$.vn') version_name,
        get_json_object(line,'$.l') lang,
        get_json_object(line,'$.sr') source,
        get_json_object(line,'$.os') os,
        get_json_object(line,'$.ar') area,
        get_json_object(line,'$.md') model,
        get_json_object(line,'$.ba') brand,
        get_json_object(line,'$.sv') sdk_version,
        get_json_object(line,'$.g') gmail,
        get_json_object(line,'$.hw') height_width,
        get_json_object(line,'$.t') app_time,
        get_json_object(line,'$.nw') network,
        get_json_object(line,'$.ln') lng,
        get_json_object(line,'$.la') lat,
        get_json_object(line,'$.entry') entry,
        get_json_object(line,'$.open_ad_type') open_ad_type,
        get_json_object(line,'$.action') action,
        get_json_object(line,'$.loading_time') loading_time,
        get_json_object(line,'$.detail') detail,
        get_json_object(line,'$.extend1') extend1
    from ods_start_log 
    where dt='2021-09-16';
    

    查询是否导入成功

    hive (gmall)> select * from dwd_start_log limit 2;
    

    DWD层启动表加载数据脚本

    DWD 事件表数据解析

    创建基础明细表

    hive (gmall)> 
    drop table if exists dwd_base_event_log;
    CREATE EXTERNAL TABLE dwd_base_event_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string, 
    `app_time` string, 
    `network` string, 
    `lng` string, 
    `lat` string, 
    `event_name` string, 
    `event_json` string, 
    `server_time` string)
    PARTITIONED BY (`dt` string)
    stored as parquet
    location '/warehouse/gmall/dwd/dwd_base_event_log/';
    

    自定义UDF函数

    自定义UDTF函数

    将jar包添加到Hive的classpath

    hive (gmall)> add jar /opt/hive-3.1.2/lib/hivefunction-1.0-SNAPSHOT.jar;
    

    添加临时函数

    hive (gmall)> create temporary function base_analizer as 'com.gazikel.udf.BaseFieldUDF';
    
    hive (gmall)> create temporary function flat_analizer as 'com.gazikel.udtf.EventJsonUDTF';
    

    解析事件日志基础明细表

    insert overwrite table dwd_base_event_log 
    PARTITION (dt='2022-02-09')
    select
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    event_name,
    event_json,
    server_time
    from
    (
    select
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[0] as mid_id,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[1] as user_id,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[2] as version_code,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[3] as version_name,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[4] as lang,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[5] as source,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[6] as os,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[7] as area,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[8] as model,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[9] as brand,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[10] as sdk_version,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[11] as gmail,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[12] as height_width,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[13] as app_time,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[14] as network,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[15] as lng,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[16] as lat,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[17] as ops,
    split(base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la'),'\t')[18] as server_time
    from ods_event_log 
    where 
    dt='2022-02-09' 
    and 
    base_analizer(line,'mid,uid,vc,vn,l,sr,os,ar,md,ba,sv,g,hw,t,nw,ln,la')<>'') 
    sdk_log lateral view flat_analizer(ops) tmp_k as event_name, event_json;
    

    测试是否导入成功:

    hive (gmall)> select * from dwd_base_event_log limit 10;
    

    DWD层数据解析脚本

    DWD层事件表获取

    商品点击表

    1. 建表语句
    hive (gmall)> CREATE EXTERNAL TABLE dwd_display_log(
                > `mid_id` string,
                > `user_id` string,
                > `version_code` string,
                > `version_name` string,
                > `lang` string,
                > `source` string,
                > `os` string,
                > `area` string,
                > `model` string,
                > `brand` string,
                > `sdk_version` string,
                > `gmail` string,
                > `height_width` string,
                > `app_time` string,
                > `network` string,
                > `lng` string,
                > `lat` string,
                > `action` string,
                > `goodsid` string,
                > `place` string,
                > `extend1` string,
                > `category` string,
                > `server_time` string
                > )
                > PARTITIONED BY (dt string)
                > location '/warehouse/gmall/dwd/dwd_display_log/';
    
    1. 导入数据
    insert overwrite table dwd_display_log PARTITION (dt='2022-02-08')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.goodsid') goodsid,
    get_json_object(event_json,'$.kv.place') place,
    get_json_object(event_json,'$.kv.extend1') extend1,
    get_json_object(event_json,'$.kv.category') category,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-08' and event_name='display';
    

    商品详情页表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_newsdetail_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string, 
    `app_time` string,  
    `network` string, 
    `lng` string, 
    `lat` string, 
    `entry` string,
    `action` string,
    `goodsid` string,
    `showtype` string,
    `news_staytime` string,
    `loading_time` string,
    `type1` string,
    `category` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_newsdetail_log/';
    
    
    1. 导入数据
    insert overwrite table dwd_newsdetail_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.entry') entry,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.goodsid') goodsid,
    get_json_object(event_json,'$.kv.showtype') showtype,
    get_json_object(event_json,'$.kv.news_staytime') news_staytime,
    get_json_object(event_json,'$.kv.loading_time') loading_time,
    get_json_object(event_json,'$.kv.type1') type1,
    get_json_object(event_json,'$.kv.category') category,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='newsdetail';
    

    商品列表页表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_loading_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string,
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `action` string,
    `loading_time` string,
    `loading_way` string,
    `extend1` string,
    `extend2` string,
    `type` string,
    `type1` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_loading_log/';
    
    1. 导入数据
    insert overwrite table dwd_loading_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.loading_time') loading_time,
    get_json_object(event_json,'$.kv.loading_way') loading_way,
    get_json_object(event_json,'$.kv.extend1') extend1,
    get_json_object(event_json,'$.kv.extend2') extend2,
    get_json_object(event_json,'$.kv.type') type,
    get_json_object(event_json,'$.kv.type1') type1,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='loading';
    

    广告表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_ad_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `entry` string,
    `action` string,
    `content` string,
    `detail` string,
    `ad_source` string,
    `behavior` string,
    `newstype` string,
    `show_style` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_ad_log/';
    
    
    1. 导入数据
    insert overwrite table dwd_ad_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.entry') entry,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.content') content,
    get_json_object(event_json,'$.kv.detail') detail,
    get_json_object(event_json,'$.kv.source') ad_source,
    get_json_object(event_json,'$.kv.behavior') behavior,
    get_json_object(event_json,'$.kv.newstype') newstype,
    get_json_object(event_json,'$.kv.show_style') show_style,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-09' and event_name='ad';
    

    消息通知表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_notification_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string,
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `action` string,
    `noti_type` string,
    `ap_time` string,
    `content` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_notification_log/';
    
    1. 导入数据
    insert overwrite table dwd_notification_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.action') action,
    get_json_object(event_json,'$.kv.noti_type') noti_type,
    get_json_object(event_json,'$.kv.ap_time') ap_time,
    get_json_object(event_json,'$.kv.content') content,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='notification';
    

    用户前台活跃表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_active_foreground_log(
    `mid_id` string,
    `user_id` string,
    `version_code` string,
    `version_name` string,
    `lang` string,
    `source` string,
    `os` string,
    `area` string,
    `model` string,
    `brand` string,
    `sdk_version` string,
    `gmail` string,
    `height_width` string,
    `app_time` string,
    `network` string,
    `lng` string,
    `lat` string,
    `push_id` string,
    `access` string,
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_foreground_log/';
    
    1. 导入数据
    insert overwrite table dwd_active_foreground_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.push_id') push_id,
    get_json_object(event_json,'$.kv.access') access,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='active_foreground';
    

    用户后台活跃表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_active_background_log(
    `mid_id` string,
    `user_id` string,
    `version_code` string,
    `version_name` string,
    `lang` string,
    `source` string,
    `os` string,
    `area` string,
    `model` string,
    `brand` string,
    `sdk_version` string,
    `gmail` string,
     `height_width` string,
    `app_time` string,
    `network` string,
    `lng` string,
    `lat` string,
    `active_source` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_background_log/';
    
    1. 导入数据
    insert overwrite table dwd_active_background_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.active_source') active_source,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='active_background';
    

    评论表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_comment_log(
    `mid_id` string,
    `user_id` string,
    `version_code` string,
    `version_name` string,
    `lang` string,
    `source` string,
    `os` string,
    `area` string,
    `model` string,
    `brand` string,
    `sdk_version` string,
    `gmail` string,
    `height_width` string,
    `app_time` string,
    `network` string,
    `lng` string,
    `lat` string,
    `comment_id` int,
    `userid` int,
    `p_comment_id` int, 
    `content` string,
    `addtime` string,
    `other_id` int,
    `praise_count` int,
    `reply_count` int,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_comment_log/';
    
    1. 导入数据
    insert overwrite table dwd_comment_log PARTITION (dt='2019-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.comment_id') comment_id,
    get_json_object(event_json,'$.kv.userid') userid,
    get_json_object(event_json,'$.kv.p_comment_id') p_comment_id,
    get_json_object(event_json,'$.kv.content') content,
    get_json_object(event_json,'$.kv.addtime') addtime,
    get_json_object(event_json,'$.kv.other_id') other_id,
    get_json_object(event_json,'$.kv.praise_count') praise_count,
    get_json_object(event_json,'$.kv.reply_count') reply_count,
    server_time
    from dwd_base_event_log
    where dt='2019-02-09' and event_name='comment';
    

    收藏表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_favorites_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `id` int, 
    `course_id` int, 
    `userid` int,
    `add_time` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_favorites_log/';
    
    1. 导入数据
    insert overwrite table dwd_favorites_log
    PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.id') id,
    get_json_object(event_json,'$.kv.course_id') course_id,
    get_json_object(event_json,'$.kv.userid') userid,
    get_json_object(event_json,'$.kv.add_time') add_time,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-09' and event_name='favorites';
    

    点赞表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_praise_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `id` string, 
    `userid` string, 
    `target_id` string,
    `type` string,
    `add_time` string,
    `server_time` string
    )
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_praise_log/';
    
    1. 导入数据
    insert overwrite table dwd_praise_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.id') id,
    get_json_object(event_json,'$.kv.userid') userid,
    get_json_object(event_json,'$.kv.target_id') target_id,
    get_json_object(event_json,'$.kv.type') type,
    get_json_object(event_json,'$.kv.add_time') add_time,
    server_time
    from dwd_base_event_log
    where dt='2022-02-09' and event_name='praise';
    

    错误日志表

    1. 建表语句
    CREATE EXTERNAL TABLE dwd_error_log(
    `mid_id` string,
    `user_id` string, 
    `version_code` string, 
    `version_name` string, 
    `lang` string, 
    `source` string, 
    `os` string, 
    `area` string, 
    `model` string,
    `brand` string, 
    `sdk_version` string, 
    `gmail` string, 
    `height_width` string,  
    `app_time` string,
    `network` string, 
    `lng` string, 
    `lat` string, 
    `errorBrief` string, 
    `errorDetail` string, 
    `server_time` string)
    PARTITIONED BY (dt string)
    location '/warehouse/gmall/dwd/dwd_error_log/';
    
    1. 导入数据
    insert overwrite table dwd_error_log PARTITION (dt='2022-02-09')
    select 
    mid_id,
    user_id,
    version_code,
    version_name,
    lang,
    source,
    os,
    area,
    model,
    brand,
    sdk_version,
    gmail,
    height_width,
    app_time,
    network,
    lng,
    lat,
    get_json_object(event_json,'$.kv.errorBrief') errorBrief,
    get_json_object(event_json,'$.kv.errorDetail') errorDetail,
    server_time
    from dwd_base_event_log 
    where dt='2022-02-09' and event_name='error';
    

    DWD层事件表加载数据脚本

    在/opt/shell目录下新建dwd_event_log.sh脚本

    $ vim /opt/shell/dwd_event_log.sh
    

    添加如下内容:

    #!/bin/bash
    
    # 定义变量方便修改
    APP=gmall
    hive=/opt/module/hive/bin/hive
    
    # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
    if [ -n "$1" ] ;then
    	do_date=$1
    else 
    	do_date=`date -d "-1 day" +%F`  
    fi 
    
    sql="
    set hive.exec.dynamic.partition.mode=nonstrict;
    
    insert overwrite table "$APP".dwd_display_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.goodsid') goodsid,
    	get_json_object(event_json,'$.kv.place') place,
    	get_json_object(event_json,'$.kv.extend1') extend1,
    	get_json_object(event_json,'$.kv.category') category,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='display';
    
    
    insert overwrite table "$APP".dwd_newsdetail_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.entry') entry,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.goodsid') goodsid,
    	get_json_object(event_json,'$.kv.showtype') showtype,
    	get_json_object(event_json,'$.kv.news_staytime') news_staytime,
    	get_json_object(event_json,'$.kv.loading_time') loading_time,
    	get_json_object(event_json,'$.kv.type1') type1,
    	get_json_object(event_json,'$.kv.category') category,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='newsdetail';
    
    
    insert overwrite table "$APP".dwd_loading_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.loading_time') loading_time,
    	get_json_object(event_json,'$.kv.loading_way') loading_way,
    	get_json_object(event_json,'$.kv.extend1') extend1,
    	get_json_object(event_json,'$.kv.extend2') extend2,
    	get_json_object(event_json,'$.kv.type') type,
    	get_json_object(event_json,'$.kv.type1') type1,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='loading';
    
    
    insert overwrite table "$APP".dwd_ad_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.entry') entry,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.content') content,
    	get_json_object(event_json,'$.kv.detail') detail,
    	get_json_object(event_json,'$.kv.source') ad_source,
    	get_json_object(event_json,'$.kv.behavior') behavior,
    	get_json_object(event_json,'$.kv.newstype') newstype,
    	get_json_object(event_json,'$.kv.show_style') show_style,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='ad';
    
    
    insert overwrite table "$APP".dwd_notification_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.action') action,
    	get_json_object(event_json,'$.kv.noti_type') noti_type,
    	get_json_object(event_json,'$.kv.ap_time') ap_time,
    	get_json_object(event_json,'$.kv.content') content,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='notification';
    
    
    insert overwrite table "$APP".dwd_active_foreground_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    get_json_object(event_json,'$.kv.push_id') push_id,
    get_json_object(event_json,'$.kv.access') access,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='active_foreground';
    
    
    insert overwrite table "$APP".dwd_active_background_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.active_source') active_source,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='active_background';
    
    
    insert overwrite table "$APP".dwd_comment_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.comment_id') comment_id,
    	get_json_object(event_json,'$.kv.userid') userid,
    	get_json_object(event_json,'$.kv.p_comment_id') p_comment_id,
    	get_json_object(event_json,'$.kv.content') content,
    	get_json_object(event_json,'$.kv.addtime') addtime,
    	get_json_object(event_json,'$.kv.other_id') other_id,
    	get_json_object(event_json,'$.kv.praise_count') praise_count,
    	get_json_object(event_json,'$.kv.reply_count') reply_count,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='comment';
    
    
    insert overwrite table "$APP".dwd_favorites_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.id') id,
    	get_json_object(event_json,'$.kv.course_id') course_id,
    	get_json_object(event_json,'$.kv.userid') userid,
    	get_json_object(event_json,'$.kv.add_time') add_time,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='favorites';
    
    
    insert overwrite table "$APP".dwd_praise_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.id') id,
    	get_json_object(event_json,'$.kv.userid') userid,
    	get_json_object(event_json,'$.kv.target_id') target_id,
    	get_json_object(event_json,'$.kv.type') type,
    	get_json_object(event_json,'$.kv.add_time') add_time,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='praise';
    
    
    insert overwrite table "$APP".dwd_error_log
    PARTITION (dt='$do_date')
    select 
    	mid_id,
    	user_id,
    	version_code,
    	version_name,
    	lang,
    	source,
    	os,
    	area,
    	model,
    	brand,
    	sdk_version,
    	gmail,
    	height_width,
    	app_time,
    	network,
    	lng,
    	lat,
    	get_json_object(event_json,'$.kv.errorBrief') errorBrief,
    	get_json_object(event_json,'$.kv.errorDetail') errorDetail,
    	server_time
    from "$APP".dwd_base_event_log 
    where dt='$do_date' and event_name='error';
    "
    
    $hive -e "$sql"
    

    给脚本添加可执行权限:

    $ chmod 777 /opt/shell/dwd_event_log.sh
    
  • 相关阅读:
    day6 面向对象(2)
    day5 面向对象
    day4 函数重载
    sqlserver 存储过程 增加
    sqlserver 存储过程 修改
    sqlserver 存储过程 删除
    sqlserver 存储过程 查询
    上篇: php 微信公众号 基于Thinkphp3.2框架开发
    bzoj 2726: [SDOI2012]任务安排
    bzoj 4199 [NOI2015]寿司晚宴
  • 原文地址:https://www.cnblogs.com/Gazikel/p/15878204.html
Copyright © 2020-2023  润新知