• Hive sql


    GROUP BY 语句
        合计函数 (比如 SUM) 常常需要添加 GROUP BY 语句。
        GROUP BY 语句用于结合合计函数,根据一个或多个列对结果集进行分组。
    
    
    HAVING 子句
        在 SQL 中增加 HAVING 子句原因是,WHERE 关键字无法与合计函数一起使用。
    WHERE子句:在分组之前使用,表示从所有数据中筛选出部分数据,已完成分组的要求,在WHERE子句中不允许使用统计函数,没有GROUP BY子句也可以使用。
    
    HAVING子句:实在分组之后使用的,表示对分组统计后的数据执行再次过滤,可以使用统计函数,有GROUP BY子句之后才可以出现HAVING子句。
    HAVING 使用统计函数后,select 也必须 筛选这个统计函数。
    
    SELECT * FROM table GROUP BY x HAVING * 
    desc function extended from_unixtime; 
    
    日期函数
    
    from_unixtime 
        将时间戳变为指定格式的日期
        
    
    unix_timestamp
        将时间转换为时间戳
        
        
    datediff(d1,d2) 
        计算日期间隔 d1-d2
        
        
    to_date
        将长日期转化为短日期
        将年月日 时分秒 转换为 去掉时分秒只有年月日的格式。
        
        
    year(dt)
    
        dt的格式为 yyyy-MM-dd
        另一种方法 dt BETWEEN '' AND '',很明显,直接用year更简单。
        
    date_add
    
    
    date_sub
    
    
    
    条件函数
    
    CASE WHEN
        语法:CASE WHEN 'x1' THEN 'y1'
                   WHEN 'x2' THEN 'y2'
                   ELSE 'y3'
               END
               
        
        SELECT CASE WHEN age<20 THEN '20岁以下'
        WHEN age BETWEEN 20 AND 30 THEN '20-30岁'
        ELSE '其它岁数' END,count(distinct user_id)
        FROM user_info
        GROUP BY CASE WHEN age<20 THEN '20岁以下'
        WHEN age BETWEEN 20 AND 30 THEN '20-30岁'
        ELSE '其它岁数' END ;
    
        这个例子说明了CASE WHEN 可以作为group by的条件。作为group by的参数,实际上是将某一列的数据进一步细分了。
        
        
        
    if 
        语法: if(条件,条件True的值,条件False的值)
        
        SELECT sex,if(level>5,'高级','低级'),count(user_id)
        FROM user_info
        GROUP BY sex,if(level>5,'高级','低级');
        
        这里的应用也是作为group by的一个条件。level本身就是数据的某一列。
        
        
        
        
        
    字符串函数
        
        substr 字符串截取
        
        
        
        
        
    特殊类型处理
    
        extra1  string,
        extra2  map<string,string>)
            从数据库中读出来都是字典样式,但是取值的方法却由于类型的不同而不同。
            
            extra1     
                get_json_object(extra1,'$.phonebrand')
                    注:get_json_object(需要解析的json字段,用$.key取出想要获取的value)
            extra2 
                extra2['phonebrand']

    练手的HQL

    SELECT a.user_name FROM (SELECT DISTINCT user_name FROM user_trade WHERE year(dt)=2017)a 
        INNER JOIN (SELECT DISTINCT user_name FROM user_trade WHERE year(dt)=2018)b
        ON a.user_name=b.user_name;
        1 使用内置函数,对分区字段取值。
        2 表需要重命名
        3 一个等号
        
    SELECT a.user_name FROM (SELECT user_name FROM trade_2017)a
        INNER JOIN (SELECT user_name FROM trade_2018)b ON a.user_name=b.user_name
        INNER JOIN (SELECT user_name FROM trade_2019)c ON b.user_name=c.user_name;
        1 多张子表关联,注意格式。
    
    SELECT b.name FROM (SELECT distinct user_name from list1)a left join 
    (SELECT distinct user_name from list2)b on a.user_name=b.name where b.name is null;
        1 判断是否为null,用is,不是=
    
    
    SELECT b. education,count(b.user_name) 
    FROM (SELECT DISTINCT user_name FROM user_trade WHERE year(dt)=2019)a 
    INNER JOIN (SELECT DISTINCT user_name,extra2[‘education’] as education FROM user_info) b 
    ON a.user_name=b.user_name 
    GROUP BY b.education;
    
    
    SELECT c.user_name FROM (SELECT DISTINCT user_name FROM trade_2017)a
    INNER JOIN (SELECT DISTINCT user_name FROM trade_2018)b 
        ON a.user_name=b.user_name
        LEFT JOIN (SELECT DISTINCT user_name FROM trade_2019)c
        ON b.user_name=c.user_name 
    WHERE c.user_name is null;
        
    
    
    UNION ALL
        不去重不排序
        1 需要对表重命名。
        2 需要筛选后,才能UNION
    
        SELECT DISTINCT c.user_name,COUNT(c.user_name)
    FROM (SELECT DISTINCT user_name FROM trade_2017
    UNION ALL
    SELECT DISTINCT user_name FROM trade_2018)c;
    UNION 
        去重排序
    
    
    
    
    
    SELECT a.user_name,IF(a.sum_pay is null,0,a,sum_pay),if(b.sum_refund is null,0,b.sum_refund)
    FROM 
     (SELECT user_name,SUM(pay_amount) as sum_pay FROM user_trade WHERE year(dt)=2017 GROUP BY user_name)a 
    FULL JOIN 
    (SELECT user_name,SUM(refund_amount) as sum_refund FROM user_refund WHERE year(dt)=2017 GROUP BY user_name)b
    ON a.user_name=b.user_name
    GROUP BY a.user_name 
    
    
    
    
    
    SELECT a.user_name,a.sum_pay,if(b.sum_refund is null,0,b.sum_refund)
    FROM
    (SELECT user_name,SUM(pay_amount) as sum_pay FROM user_trade WHERE year(dt)=2017 GROUP BY user_name)a
    LEFT JOIN
    (SELECT user_name,SUM(refund_amount) as sum_refund FROM user_refund WHERE year(dt)=2017 GROUP BY user_name)b
    ON a.user_name=b.user_name
    
    
    
    SELECT     CASE
            WHEN age<20 THEN ‘青年’
            WHEN age BETWEEN 20 AND 60 THEN ‘中年’
            ELSE ‘老年’
        END,COUNT(a,user_name)
    FROM 
    (SELECT DISTINCT user_name,age FROM user_info WHERE year(firstactivetime)=2017) a
    JOIN
    (SELECT DISTINCT user_name,pay_amount FROM user_trade) b
    ON a.user_name=b.user_name 
    WHERE b.pay_amount is 0
    GROUP BY 
        CASE
            WHEN age<20 THEN ‘青年’
            WHEN age BETWEEN 20 AND 60 THEN ‘中年’
            ELSE ‘老年’
        END
    备注:CASE END 也可以作为一个变量名
    
    
    SELECT     a.age_level,COUNT(a,user_name)
    FROM 
    (SELECT DISTINCT user_name, 
    CASE
            WHEN age<20 THEN ‘青年’
            WHEN age BETWEEN 20 AND 60 THEN ‘中年’
            ELSE ‘老年’
        END AS age_level                          -->  学完Tableau之后,这个就太容易理解了
     FROM user_info WHERE year(firstactivetime)=2017) a
    JOIN
    (SELECT DISTINCT user_name,pay_amount FROM user_trade) b
    ON a.user_name=b.user_name 
    WHERE b.pay_amount is 0
    GROUP BY a.age_level;
    
    
    
    SELECT hour(firstactivetime),COUNT(DISTINCT a.user_name)
    FROM 
    (SELECT DISTINCT user_name FROM trade_2017 
    UNION ALL
    SELECT DISTINCT user_name FROM trade_2018) a
    LEFT JOIN
    (SELECT DISTINCT user_name,firstactivetime FROM user_info)b
    ON a.user_name=b.user_name 
    GROUP BY hour(firstactivetime);


    SELECT b.sex,count(b.user_name) FROM
    (SELECT user_name FROM user_trade WHERE year(dt)=2019
    UNION
    SELECT user_name FROM user_refund WHERE year(dt)=2019)a
    JOIN
    (SELECT distinct user_name,sex FROM user_info)b ON a.user_name=b.user_name
    GROUP BY b.sex;

    
    


    SELECT d.city,count(d.user_name)
    FROM
    (SELECT a.user_name FROM
    (SELECT distinct user_name FROM user_trade WHERE year(dt)=2018)a
    LEFT JOIN
    (SELECT distinct user_name,pay_amount FROM user_trade WHERE year(dt)=2019)b
    ON a.user_name=b.user_name WHERE b.pay_amount is null)c
    JOIN (SELECT distinct user_name,city FROM user_info)d
    ON c.user_name=d.user_name GROUP BY d.city;

    
    
    
    
    

    SELECT d.phone,count(d.user_name)
    FROM
    (SELECT a.user_name FROM
    (SELECT distinct user_name FROM trade_2017
    UNION ALL
    SELECT distinct user_name FROM trade_2018
    UNION ALL
    SELECT distinct user_name FROM trade_2019)a
    LEFT JOIN
    (SELECT distinct user_name,refund_amount FROM user_refund WHERE dt>'0')b
    ON a.user_name=b.user_name where b.refund_amount is null)c
    JOIN
    (SELECT distinct user_name,extra2['phonebrand'] as phone FROM user_info)d
    on c.user_name=d.user_name
    GROUP BY d.phone;

     
  • 相关阅读:
    网络基础
    SQL注入
    OpenID说明
    Linux网络编程
    Linux的僵尸进程产生原因及解决方法
    计算机系统的存储层次
    Java实现SSO
    JD(转载)
    Switch的表达式的要求
    leetcode(23. Merge k Sorted Lists)
  • 原文地址:https://www.cnblogs.com/654321cc/p/12175562.html
Copyright © 2020-2023  润新知