1 手写HQL 第1题
表结构:uid,subject_id,score
求:找出所有科目成绩都大于某一学科平均成绩的学生
数据集如下
1001 01 90 1001 02 90 1001 03 90 1002 01 85 1002 02 85 1002 03 70 1003 01 70 1003 02 70 1003 03 85
1)建表语句
create table score( uid string, subject_id string, score int) row format delimited fields terminated by ' ';
2)求出每个学科平均成绩
select uid, score, avg(score) over(partition by subject_id) avg_score from score;t1
3)根据是否大于平均成绩记录flag,大于则记为0否则记为1
select uid, if(score>avg_score,0,1) flag from t1;t2
4)根据学生id进行分组统计flag的和,和为0则是所有学科都大于平均成绩
select uid from t2 group by uid having sum(flag)=0;
5)最终SQL
select uid from (select uid, if(score>avg_score,0,1) flag from (select uid, score, avg(score) over(partition by subject_id) avg_score from score)t1)t2 group by uid having sum(flag)=0;
2 手写HQL 第2题
我们有如下的用户访问数据
userId |
visitDate |
visitCount |
u01 |
2017/1/21 |
5 |
u02 |
2017/1/23 |
6 |
u03 |
2017/1/22 |
8 |
u04 |
2017/1/20 |
3 |
u01 |
2017/1/23 |
6 |
u01 |
2017/2/21 |
8 |
U02 |
2017/1/23 |
6 |
U01 |
2017/2/22 |
4 |
要求使用SQL统计出每个用户的累积访问次数,如下表所示:
用户id |
月份 |
小计 |
累积 |
u01 |
2017-01 |
11 |
11 |
u01 |
2017-02 |
12 |
23 |
u02 |
2017-01 |
12 |
12 |
u03 |
2017-01 |
8 |
8 |
u04 |
2017-01 |
3 |
3 |
数据集
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
1)创建表
create table action
(userId string,
visitDate string,
visitCount int)
row format delimited fields terminated by " ";
2)修改数据格式
select
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from
action;t1
3)计算每人单月访问量
select
userId,
mn,
sum(visitCount) mn_count
from
t1
group by
userId,mn;t2
4)按月累计访问量
select
userId,
mn,
mn_count,
sum(mn_count) over(partition by userId order by mn)
from t2;
5)最终SQL
select userId, mn, mn_count, sum(mn_count) over(partition by userId order by mn) from ( select userId, mn, sum(visitCount) mn_count from (select userId, date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn, visitCount from action)t1 group by userId,mn)t2;
3 手写HQL 第3题
有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:
1)每个店铺的UV(访客数)
2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
数据集
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
1)建表
create table visit(user_id string,shop string) row format delimited fields terminated by ' ';
2)每个店铺的UV(访客数)
select shop,count(distinct user_id) from visit group by shop;
3)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
(1)查询每个店铺被每个用户访问次数
select shop,user_id,count(*) ct
from visit
group by shop,user_id;t1
(2)计算每个店铺被用户访问次数排名
select shop,user_id,ct,rank() over(partition by shop order by ct) rk
from t1;t2
(3)取每个店铺排名前3的
select shop,user_id,ct
from t2
where rk<=3;
(4)最终SQL