• dws_uv_detail_daycount


    按mid_id统计,每个用户可能通过多个账号进行操作,需要使用 collect_set函数进行收集

    sql:

    drop table if exists dws_uv_detail_daycount; 
    create external table dws_uv_detail_daycount 
    ( 
        `mid_id` string COMMENT '设备唯一标识', 
        `user_id` string COMMENT '用户标识', 
        `version_code` string COMMENT '程序版本号', 
        `version_name` string COMMENT '程序版本名', 
        `lang` string COMMENT '系统语言', 
        `source` string COMMENT '渠道号', 
        `os` string COMMENT '安卓系统版本', 
        `area` string COMMENT '区域', 
        `model` string COMMENT '手机型号', 
        `brand` string COMMENT '手机品牌', 
        `sdk_version` string COMMENT 'sdkVersion', 
        `gmail` string COMMENT 'gmail', 
        `height_width` string COMMENT '屏幕宽高', 
        `app_time` string COMMENT '客户端日志产生时的时间', 
        `network` string COMMENT '网络模式', 
        `lng` string COMMENT '经度', 
        `lat` string COMMENT '纬度', 
        `login_count` bigint COMMENT '活跃次数' 
    )
    partitioned by(dt string) stored as parquet 
    location '/ecdw/dws/dws_uv_detail_daycount';
    
    
    --插入
    insert overwrite table dws_uv_detail_daycount 
    partition(dt='2020-03-12') 
    select 
        mid_id, 
        concat_ws('|', collect_set(user_id)) user_id, 
        concat_ws('|', collect_set(version_code)) version_code, 
        concat_ws('|', collect_set(version_name)) version_name, 
        concat_ws('|', collect_set(lang))lang, concat_ws('|', collect_set(source)) source, 
        concat_ws('|', collect_set(os)) os, concat_ws('|', collect_set(area)) area, 
        concat_ws('|', collect_set(model)) model, concat_ws('|', collect_set(brand)) brand, 
        concat_ws('|', collect_set(sdk_version)) sdk_version, concat_ws('|', collect_set(gmail)) gmail, 
        concat_ws('|', collect_set(height_width)) height_width, concat_ws('|', collect_set(app_time)) app_time, 
        concat_ws('|', collect_set(network)) network, concat_ws('|', collect_set(lng)) lng, 
        concat_ws('|', collect_set(lat)) lat, count(*) login_count 
    from dwd_start_log 
    where dt='2020-03-12' 
    group by mid_id;
  • 相关阅读:
    网页css效果调试技巧
    font: 300 12px/24px "宋体",arial,serif;
    php调试心得
    linux的vim命令介绍和其他命令
    wamp提示can't find driver 针对mysql数据库
    调试yii程序php页面显示中文
    【rgw | 运维】部署rgw
    【ceph | 运维】pool相关命令
    【ceph | 运维】application not enabled 的解决方法
    【ceph | 运维】nautilus版本编译
  • 原文地址:https://www.cnblogs.com/ldy233/p/14437158.html
Copyright © 2020-2023  润新知