• hive 取两次记录的时间差 lead lag first_value last_value


    -- LEAD(col,n,DEFAULT) 用于统计窗口内往下第n行值
    -- 第一个参数为列名,第二个参数为往下第n行(可选,默认为1),第三个参数为默认值(当往下第n行为NULL时候,取默认值,如不指定,则为NULL)
    -- LAG(col,n,DEFAULT) 用于统计窗口内往上第n行值
    -- 第一个参数为列名,第二个参数为往上第n行(可选,默认为1),第三个参数为默认值(当往上第n行为NULL时候,取默认值,如不指定,则为NULL)
    -- FIRST_VALUE 取分组内排序后,截止到当前行,第一个值
    -- LAST_VALUE  取分组内排序后,截止到当前行,最后一个值
    -- 这几个函数不支持WINDOW子句
    
    select
         t2.id
        ,t2.day
        ,t2.lead_default_day
        ,t2.lead_2_day
        ,t2.lag_default_day
        ,t2.lag_2_day
        ,t2.first_day_1
        ,t2.first_day_2
        ,t2.last_day_1
        ,t2.last_day_2
        ,(unix_timestamp(t2.lead_default_day)-unix_timestamp(t2.day))/3600 as diff_hour
    from (
        select
             t1.id
            ,t1.day
            ,lead(t1.day) over(partition by t1.id order by t1.day)                         as lead_default_day
            ,lead(t1.day,1,'2018-01-01 00:00:00') over(partition by t1.id order by t1.day) as lead_2_day
            ,lag(t1.day) over(partition by t1.id order by t1.day)                          as lag_default_day
            ,lag(t1.day,1,'2018-01-01 00:00:00') over(partition by t1.id order by t1.day)  as lag_2_day
            ,first_value(t1.day) over(partition by t1.id order by t1.day)                  as first_day_1
            ,first_value(t1.day) over(partition by t1.id)                  as first_day_2
            ,last_value(t1.day) over(partition by t1.id order by t1.day)                   as last_day_1
            ,last_value(t1.day) over(partition by t1.id)                                   as last_day_2
        from (
            select 'a' as id, '2018-01-01 12:22:00' as day union all
            select 'a' as id, '2018-01-09 00:00:00' as day union all
            select 'a' as id, '2018-01-02 00:00:00' as day union all
            select 'a' as id, '2018-01-03 00:00:00' as day union all
            select 'a' as id, '2018-01-04 00:00:00' as day union all
            select 'b' as id, '2018-01-08 00:00:00' as day union all
            select 'b' as id, '2018-01-05 00:00:00' as day union all
            select 'b' as id, '2018-01-06 00:00:00' as day union all
            select 'b' as id, '2018-01-07 00:00:00' as day
        ) t1
    ) t2
    ;
    +-----+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+---------------------+--+
    | id  |         day          |   lead_default_day   |      lead_2_day      |      lag_2_day       |     first_day_1      |     first_day_2      |      last_day_1      |      last_day_2      |      diff_hour      |
    +-----+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+---------------------+--+
    | b   | 2018-01-05 00:00:00  | 2018-01-06 00:00:00  | 2018-01-06 00:00:00  | 2018-01-01 00:00:00  | 2018-01-05 00:00:00  | 2018-01-05 00:00:00  | 2018-01-05 00:00:00  | 2018-01-08 00:00:00  | 24.0                |
    | b   | 2018-01-06 00:00:00  | 2018-01-07 00:00:00  | 2018-01-07 00:00:00  | 2018-01-05 00:00:00  | 2018-01-05 00:00:00  | 2018-01-05 00:00:00  | 2018-01-06 00:00:00  | 2018-01-08 00:00:00  | 24.0                |
    | b   | 2018-01-07 00:00:00  | 2018-01-08 00:00:00  | 2018-01-08 00:00:00  | 2018-01-06 00:00:00  | 2018-01-05 00:00:00  | 2018-01-05 00:00:00  | 2018-01-07 00:00:00  | 2018-01-08 00:00:00  | 24.0                |
    | b   | 2018-01-08 00:00:00  | NULL                 | 2018-01-01 00:00:00  | 2018-01-07 00:00:00  | 2018-01-05 00:00:00  | 2018-01-05 00:00:00  | 2018-01-08 00:00:00  | 2018-01-08 00:00:00  | NULL                |
    | a   | 2018-01-01 12:22:00  | 2018-01-02 00:00:00  | 2018-01-02 00:00:00  | 2018-01-01 00:00:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-09 00:00:00  | 11.633333333333333  |
    | a   | 2018-01-02 00:00:00  | 2018-01-03 00:00:00  | 2018-01-03 00:00:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-02 00:00:00  | 2018-01-09 00:00:00  | 24.0                |
    | a   | 2018-01-03 00:00:00  | 2018-01-04 00:00:00  | 2018-01-04 00:00:00  | 2018-01-02 00:00:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-03 00:00:00  | 2018-01-09 00:00:00  | 24.0                |
    | a   | 2018-01-04 00:00:00  | 2018-01-09 00:00:00  | 2018-01-09 00:00:00  | 2018-01-03 00:00:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-04 00:00:00  | 2018-01-09 00:00:00  | 120.0               |
    | a   | 2018-01-09 00:00:00  | NULL                 | 2018-01-01 00:00:00  | 2018-01-04 00:00:00  | 2018-01-01 12:22:00  | 2018-01-01 12:22:00  | 2018-01-09 00:00:00  | 2018-01-09 00:00:00  | NULL                |
    +-----+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+----------------------+---------------------+--+
  • 相关阅读:
    CentOS 6.x Radius
    Linux系统优化
    Linux 常用检测命令
    Linux 修改终端命令提示符颜色
    Linux LVM简明教程
    剑指Offer 通过中序和先序遍历重建二叉树
    剑指Offer 树的子结构
    剑指Offer 从上往下打印二叉树(dfs)
    剑指Offer 把字符串转换成整数
    剑指Offer 两个链表的第一个公共结点
  • 原文地址:https://www.cnblogs.com/chenzechao/p/9843383.html
Copyright © 2020-2023  润新知