流失用户:最近7天未登录我们称之为流失用户
17.1 DWS层
使用日活明细表dws_uv_detail_day作为DWS层数据
17.2 ADS层
1)建表语句
drop table if exists ads_wastage_count;
create external table ads_wastage_count(
`dt` string COMMENT '统计日期',
`wastage_count` bigint COMMENT '流失设备数'
)
row format delimited fields terminated by ' '
location '/warehouse/gmall/ads/ads_wastage_count';
2)导入2019-02-20数据
insert into table ads_wastage_count
select
'2019-02-20',
count(*)
from
(
select mid_id
from dws_uv_detail_day
group by mid_id
having max(dt)<=date_add('2019-02-20',-7)
)t1;
17.3 编写脚本
1)创建脚本
[kgg@hadoop102 bin]$ vim ads_wastage_log.sh
在脚本中编写如下内容
#!/bin/bash
if [ -n "$1" ];then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
hive=/opt/module/hive/bin/hive
APP=gmall
echo "-----------导入日期$do_date-----------"
sql="
insert into table "$APP".ads_wastage_count
select
'$do_date',
count(*)
from
(
select mid_id
from "$APP".dws_uv_detail_day
group by mid_id
having max(dt)<=date_add('$do_date',-7)
)t1;"
$hive -e "$sql"
2)增加脚本执行权限
chmod 777 ads_wastage_log.sh
3)脚本使用
ads_wastage_log.sh 2019-02-20
4)查询结果
select * from ads_wastage_count;
5)脚本执行时间
企业开发中一般在每日凌晨30分~1点
第18章 需求七:最近连续三周活跃用户数
最近3周连续活跃的用户:通常是周一对前3周的数据做统计,该数据一周计算一次。
18.1 DWS层
使用周活明细表dws_uv_detail_wk作为DWS层数据
18.2 ADS层
1)建表语句
drop table if exists ads_continuity_wk_count;
create external table ads_continuity_wk_count(
`dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日期',
`wk_dt` string COMMENT '持续时间',
`continuity_count` bigint
)
row format delimited fields terminated by ' '
location '/warehouse/gmall/ads/ads_continuity_wk_count';
2)导入2019-02-20所在周的数据
insert into table ads_continuity_wk_count
select
'2019-02-20',
concat(date_add(next_day('2019-02-20','MO'),-7*3),'_',date_add(next_day('2019-02-20','MO'),-1)),
count(*)
from
(
select mid_id
from dws_uv_detail_wk
where wk_dt>=concat(date_add(next_day('2019-02-20','MO'),-7*3),'_',date_add(next_day('2019-02-20','MO'),-7*2-1))
and wk_dt<=concat(date_add(next_day('2019-02-20','MO'),-7),'_',date_add(next_day('2019-02-20','MO'),-1))
group by mid_id
having count(*)=3
)t1;
3)查询
select * from ads_continuity_wk_count;
18.3 编写脚本
1)创建脚本
[kgg@hadoop102 bin]$ vim ads_continuity_wk_log.sh
在脚本中编写如下内容
#!/bin/bash
if [ -n "$1" ];then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
hive=/opt/module/hive/bin/hive
APP=gmall
echo "-----------导入日期$do_date-----------"
sql="
insert into table "$APP".ads_continuity_wk_count
select
'$do_date',
concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_date','MO'),-1)),
count(*)
from
(
select mid_id
from "$APP".dws_uv_detail_wk
where wk_dt>=concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_date','MO'),-7*2-1))
and wk_dt<=concat(date_add(next_day('$do_date','MO'),-7),'_',date_add(next_day('$do_date','MO'),-1))
group by mid_id
having count(*)=3
)t1;"
$hive -e "$sql"
2)增加脚本执行权限
chmod 777 ads_continuity_wk_log.sh
3)脚本使用
ads_continuity_wk_log.sh 2019-02-20
4)查询结果
select * from ads_continuity_wk_count;
5)脚本执行时间
企业开发中一般在每周一凌晨30分~1点
第19章 需求八:最近七天内连续三天活跃用户数
说明:最近7天内连续3天活跃用户数
19.1 DWS层
使用日活明细表dws_uv_detail_day作为DWS层数据
19.2 ADS层
1)建表语句
drop table if exists ads_continuity_uv_count;
create external table ads_continuity_uv_count(
`dt` string COMMENT '统计日期',
`wk_dt` string COMMENT '最近7天日期',
`continuity_count` bigint
) COMMENT '连续活跃设备数'
row format delimited fields terminated by ' '
location '/warehouse/gmall/ads/ads_continuity_uv_count';
2)写出导入数据的SQL语句
insert into table ads_continuity_uv_count
select
'2019-02-12',
concat(date_add('2019-02-12',-6),'_','2019-02-12'),
count(*)
from
(
select mid_id
from
(
select mid_id
from
(
select
mid_id,
date_sub(dt,rank) date_dif
from
(
select
mid_id,
dt,
rank() over(partition by mid_id order by dt) rank
from dws_uv_detail_day
where dt>=date_add('2019-02-12',-6) and dt<='2019-02-12'
)t1
)t2
group by mid_id,date_dif
having count(*)>=3
)t3
group by mid_id
)t4;
3)查询
select * from ads_continuity_uv_count;
19.3 编写脚本
1)创建脚本
[kgg@hadoop102 bin]$ vim ads_continuity_log.sh
在脚本中编写如下内容
2)增加脚本执行权限
chmod 777 ads_continuity_log.sh
3)脚本使用
ads_continuity_log.sh 2019-02-12
4)查询结果
select * from ads_continuity_uv_count;