• 项目实战从0到1之hive(46)大数据项目之电商数仓(用户行为数据)(十四)


    第23章 需求十二:统计每日各类别下点击次数top10的商品

    23.1 DWS层

    使用点击日志表作为DWS层数据源

    23.2 ADS层

    23.2.1 建表语句

    drop table if exists ads_goods_display_top10;
    create external table ads_goods_display_top10 (
      `dt` string COMMENT '日期',
      `category` string COMMENT '品类',
      `goodsid` string COMMENT '商品id',
      `goods_count` string COMMENT '商品点击次数'
    )
    row format delimited fields terminated by ' '
    location '/warehouse/gmall/ads/ads_goods_display_top10';

    23.2.2 导入数据

    insert into table ads_goods_display_top10
    select
     '2019-10-03',
    category,
    goodsid,
     count
    from(
     select
      category,
      goodsid,
       count,
      rank() over(partition by category order by count desc) rk
     from(
       select
        category,
        goodsid,
         count(*) count
       from dwd_display_log
       where dt='2019-10-03' and action=2
       group by category, goodsid
    )t1
    )t2
    where rk<=10;

    23.2.3 导入数据脚本

    1)创建脚本ads_goods_display_top10.sh

    vim ads_goods_display_top10.sh
    #!/bin/bash
    db=gmall
    hive=/opt/module/hive/bin/hive
    hadoop=/opt/module/hadoop/bin/hadoop

    if [[ -n $1 ]]; then
       do_date=$1
    else
       do_date=`date -d '-1 day' +%F`
    fi

    sql="
    use gmall;
    insert into table ads_goods_display_top10
    select
     '$do_date',
    category,
    goodsid,
    count
    from(
    select
      category,
      goodsid,
      count,
      rank() over(partition by category order by count desc) rk
    from(
      select
        category,
        goodsid,
        count(*) count
      from dwd_display_log
      where dt='$do_date' and action=2
      group by category, goodsid
    )t1
    )t2
    where rk<=10
    "

    $hive -e "$sql"

    2)增加脚本执行权限

    chmod 777 ads_goods_display_top10.sh

    3)脚本使用

    ads_goods_display_top10.sh 2019-02-20

    4)查询结果

    select * from ads_goods_display_top10;

    5)脚本执行时间

    企业开发中一般在每天凌晨30分~1点

    第24章 需求十三:点击次数最多的10个用户点击的商品次数top10

    24.1 DWS层

    使用日志数据用户行为宽表作为DWS层表

    24.2 ADS层

    24.2.1 建表语句

    drop table if exists ads_goods_user_count;
    create external table ads_goods_user_count(
    `dt` string COMMENT '统计日期',
    `mid_id` string COMMENT '用户id',
      `u_ct` string COMMENT '用户总点击次数',
      `goodsid` string COMMENT '商品id',
      `d_ct` string COMMENT '各个商品点击次数'
    )
    row format delimited fields terminated by ' '
    location '/warehouse/gmall/ads/ads_goods_user_count';

    24.2.2 导入数据

    insert into table ads_goods_user_count
    select
     '2019-10-03',
    mid_id,
    u_ct,
    goodsid,
    d_ct
    from(
     select
      mid_id,
      u_ct,
      goodsid,
      d_ct,
      row_number() over(partition by mid_id order by d_ct desc ) rn
     from(
       select
        dl.mid_id,
        u_ct,
        dl.goodsid,
         count(*) d_ct
       from dwd_display_log dl join (
         select
          mid_id,
           count(*) u_ct
         from dws_user_action_wide_log
         group by mid_id
         order by u_ct desc
         limit 10
      )t1
       on dl.mid_id=t1.mid_id
       group by dl.mid_id, u_ct, dl.goodsid
    ) t2
    ) t3
    where rn<=10

    24.2.3 导入数据脚本

    1)创建脚本ads_goods_user_count.sh

    vim ads_goods_user_count.sh
    #!/bin/bash
    db=gmall
    hive=/opt/module/hive/bin/hive
    hadoop=/opt/module/hadoop/bin/hadoop

    if [[ -n $1 ]]; then
       do_date=$1
    else
       do_date=`date -d '-1 day' +%F`
    fi

    sql="
    use gmall;
    insert into table ads_goods_user_count
    select
     '$do_date',
    mid_id,
    u_ct,
    goodsid,
    d_ct
    from(
    select
      mid_id,
      u_ct,
      goodsid,
      d_ct,
      row_number() over(partition by mid_id order by d_ct desc ) rn
    from(
      select
        dl.mid_id,
        u_ct,
        dl.goodsid,
        count(*) d_ct
      from dwd_display_log dl join (
        select
          mid_id,
          count(*) u_ct
        from dws_user_action_wide_log
        group by mid_id
        order by u_ct desc
        limit 10
      )t1
    on dl.mid_id=t1.mid_id
    group by dl.mid_id, u_ct, dl.goodsid
    ) t2
    ) t3
    where rn<=10
    "

    $hive -e "$sql"

    2)增加脚本执行权限

    chmod 777 ads_goods_user_count.sh

    3)脚本使用

    ads_goods_user_count.sh 2019-02-20

    4)查询结果

    select * from ads_goods_user_count;

    5)脚本执行时间 企业开发中一般在每天凌晨30分~1点

    第25章 需求十四:月活跃率

    月活跃用户与截止到该月累计的用户总和之间的比例

    25.1 DWS层

    使用DWS层月活表以及ADS新增用户表作为DWS层

    25.2 ADS层

    25.2.1 建表语句

    drop table if exists ads_mn_ratio_count;
    create external table ads_mn_ratio_count(
      `dt` string COMMENT '统计日期',
      `mn` string COMMENT '统计月活跃率的月份',
      `ratio` string COMMENT '活跃率'
    )
    row format delimited fields terminated by ' '
    location '/warehouse/gmall/ads/ads_mn_ratio_count';

    25.2.2 导入数据

    insert into table ads_mn_ratio_count
    select
       '2019-10-03',
      date_format('2019-10-03','yyyy-MM'),
      mn_count/sum_user*100 mn_percent
    from
    (select count(*) mn_count from dws_uv_detail_mn where mn=date_format('2019-10-03','yyyy-MM')) t1,
    (select sum(new_mid_count) sum_user from ads_new_mid_count) t2;

    25.2.3 导入数据脚本

    1)创建脚本ads_mn_ratio_count.sh

    vim ads_mn_ratio_count.sh
    #!/bin/bash

    db=gmall
    hive=/opt/module/hive/bin/hive
    hadoop=/opt/module/hadoop/bin/hadoop

    if [[ -n $1 ]]; then
       do_date=$1
    else
       do_date=`date -d '-1 day' +%F`
    fi

    sql="
    use gmall;
    insert into table ads_mn_ratio_count
    select
       '$do_date',
      date_format('$do_date','yyyy-MM'),
      mn_count/sum_user*100 mn_percent
    from
    (select count(*) mn_count from dws_uv_detail_mn where mn=date_format('$do_date','yyyy-MM')) t1,
    (select sum(new_mid_count) sum_user from ads_new_mid_count) t2;
    "

    $hive -e "$sql"

    2)增加脚本执行权限

    chmod 777 ads_mn_ratio_count.sh

    3)脚本使用

    ads_mn_ratio_count 2019-02-20

    4)查询结果

    select * from ads_mn_ratio_count;

    5)脚本执行时间

    企业开发中一般在每天凌晨30分~1点

     

    作者:大码王

    -------------------------------------------

    个性签名:独学而无友,则孤陋而寡闻。做一个灵魂有趣的人!

    如果觉得这篇文章对你有小小的帮助的话,记得在右下角点个“推荐”哦,博主在此感谢!

    万水千山总是情,打赏一分行不行,所以如果你心情还比较高兴,也是可以扫码打赏博主,哈哈哈(っ•?ω•?)っ???!

  • 相关阅读:
    RHCE
    Linux firewalld 防火墙
    ArcGIS案例学习笔记2_2
    HP LaserJet MFP M227_M231双面打印
    罗伯特•盖洛博士(Dr. Robert Charles Gallo)是世界著名的美国生物医学家,他以共同发现了人类免疫缺陷病毒(HIV)――这一导致获得性免疫缺陷综合症(AIDS)的致病源而闻名于世。
    砸向芯片科研的巨额经费去了哪儿?(转)
    tensorflow学习资料
    必备 .NET
    c# 7 vs2017 tuple
    ArcGIS 复制要素
  • 原文地址:https://www.cnblogs.com/huanghanyu/p/14308842.html
Copyright © 2020-2023  润新知