• 2、函数


    时间函数

    当前日期

    select current_date();
     2018-11-14      

    当前时间戳

    select current_timestamp();
    2018-11-14 21:35:16.237  

    date_format()

    select date_format(current_date(),'yyyyMMdd');
    20181114 select date_format(current_timestamp(),'yyyyMMdd');
    20181114                    
    

    unix_timestamp()

    select unix_timestamp();
    +-----------------------------------------------------------+--+
    | unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss)  |
    +-----------------------------------------------------------+--+
    | 1542202845                                                |
    +-----------------------------------------------------------+--+

    from_unixtime()

    select from_unixtime(unix_timestamp(),'yyyyMMdd HH:mm:ss');
    +---------------------------------------------------------------------------------------------
    | from_unixtime(unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss), yyyyMMdd HH:mm:ss)  
    +---------------------------------------------------------------------------------------------
    | 20181114 21:43:14

    日期差值

    0: jdbc:hive2://s101:10000/lx> select datediff('2019-08-02','2019-08-06');
    +---------------------------------------------------------------+--+
    | datediff(CAST(2019-08-02 AS DATE), CAST(2019-08-06 AS DATE))  |
    +---------------------------------------------------------------+--+
    | -4                                                            |
    +---------------------------------------------------------------+--+

    字符串函数

    split

    select explode(split('hello',''));

    substr

    select substr('hello',1,3);

    trim去除前后空格

    select trim(' hello ');

    format_number

    select format_number(1234.345,1);

    concat

    length

    条件语句

    if

    select if(w1 >2 ,w1,w2) from www;
    true则w1,false则w2

    case when then

    select case when w1 > 2 then w1 when w1 <= 2 then w2 end from www;

    窗口函数

    lead

    lead(input[, offset[, default]]) 上提
        input:     上提列
        offset: 上提行数,可选,默认是 1default:填充值,可选,默认是null
        使用:select id,name,lead(id,2,'qq')over(partition by id order by id) lad from www;
        注意:2可选,不写默认1
              ‘qq’可选,不写默认null,类型需要对应,此处id是int,所以还是为null
              partition by id:可选,分组后对每个组进行lead
              order by id:必须写

    lag

    lag(input[, offset[, default]]) 下拉
        input:     下拉列
        offset: 下拉行数,可选,默认是 1default:填充值,可选,默认是null
        使用:select id,name,lag(id,2,11)over(partition by id order by id) lag from www;
        注意:2可选,不写默认1
              11可选,不写默认null
              partition by id:可选,分组后对每个组进行lag
              order by id:必须写

    first_value

    first_value(expr[, isIgnoreNull])
        expr:列名或一个表达式
        isIgnoreNull:true或false,如果是true将跳过null值,可选,默认false
        select id,name,first_value(concat(cast(id as string), name),true)over(partition by name order by id) lag from www;

    last_value

    select id,name,last_value(concat(cast(id as string), name),true)over(order by id ) lag from www;
    +-----+-------+--------+--+
    | id  | name  |  lag   |
    +-----+-------+--------+--+
    | 1   | a     | 1a     |
    | 2   | b     | 2b     |
    | 3   | c     | 3c     |
    | 4   | c     | 4c     |
    | 5   | c     | 5c     |
    | 6   | d     | 6d     |
    | 7   | b     | 7b     |
    | 8   | a     | 8a     |
    | 9   | a     | 9a     |
    | 12  | eee   | 12eee  |
    +-----+-------+--------+--+select id,name from www;
    +-----+-------+--+
    | id  | name  |
    +-----+-------+--+
    | 12  | eee   |
    | 1   | a     |
    | 2   | b     |
    | 3   | c     |
    | 4   | c     |
    | 5   | c     |
    | 6   | d     |
    | 7   | b     |
    | 8   | a     |
    | 9   | a     |
    +-----+-------+--+
    select id,name,last_value(concat(cast(id as string), name),true)over() lag from www;
    +-----+-------+------+--+
    | id  | name  | lag  |
    +-----+-------+------+--+
    | 12  | eee   | 6d   |
    | 7   | b     | 6d   |
    | 8   | a     | 6d   |
    | 9   | a     | 6d   |
    | 1   | a     | 6d   |
    | 2   | b     | 6d   |
    | 3   | c     | 6d   |
    | 4   | c     | 6d   |
    | 5   | c     | 6d   |
    | 6   | d     | 6d   |
    +-----+-------+------+--+

    over和标准聚合函数

    select distinct name,count(name)over(partition by name) s from www;
    求分区个数并去重
    select distinct name,sum(id)over(partition by name) s from www;
    分区id和并去重
    select id,name,max(length(name))over() from www;
    总体的最大长度
    select name,min(id)over(partition by name) s from www;
    每个分区最小id
    select name,max(id)over(partition by name) s from www;
    每个分区最大id
    select name,avg(id)over(partition by name) s from www;
    每个分区平均id

    over和partiton by

    over和partition by order by

    select first_value(id)over(partition by id,name) from www;
    select first_value(id)over(partition by id,name order by id,name) from www;

    以行限定窗口范围

    select * ,sum(grade)over(order by grade desc rows between current row and 1 following) from sg;
    select * ,sum(grade)over(order by grade desc rows between current row and unbounded following) from sg;
    select * ,sum(grade)over(order by grade desc rows between unbounded preceding and current row) from sg;
    select * ,sum(grade)over(order by grade desc rows between unbounded preceding and unbounded following) from sg;

    以值限定窗口范围

    select * ,sum(grade)over(order by grade desc range between unbounded preceding and current row) from sg;

    排名函数

    rank():并列跳跃

    dense_rank():并列连续,不跳跃

    row_number():连续

    Assigns a unique,sequential number to each row, 
    starting with one,
    according to the ordering of rows within the window partition
    //商家内用户访问次数倒序排列,取前三个
    select
    * from ( select * ,row_number()over(partition by id order by count desc)b from ( select id,uu,count(*) as count from shangjia where uu is not null group by id,uu)a)c where b <= 3;

    cume_dist()

    select cume_dist()over(order by mid ) from t1;
    //小于等于当前值的行数/分组内总行数    
    select cume_dist()over(order by mid desc) from t1;
    //大于等于当前值的行数/分组内总行数        

    percent_rank

    ntile(n):每个分区按一定顺序分成n份

    高级聚合函数

    grouping sets

    select *,count(1),grouping_id() from pv group by t1,t2,s3 grouping sets(t1,t2,s3);
    grouping__id    //分组的组号(可选)
    grouping sets(t1,t2,s3,()) //相当于分别对t1,t2,s3,null进行分组并用union all连接

    group by   ... with cube

    select *,count(1),grouping_id() from pv group by t1,t2,s3 with cube order by grouping_id();
    八种

    rollup

    select *,count(1),grouping_id() from pv group by t1,t2,s3 with rollup order by grouping_id();
    null、t1、t1和t2、t1和t2和t3

    排序函数

    order by age  

    全排序,一个reduce;需要加limit,在map阶段在每个分区中取出前n个元素,交给r处理

    sort by age

    部分排序

    distribute by age

    哈希分区

    cluster by

    distribute by + sort by

    select age from user_order distribute by age sort by age;
    渐变 --> 突变
  • 相关阅读:
    JVM-堆内存
    生产者消费者模式-基于线程池
    nginx 499错误
    thrift入门
    RPC-基于原生java实现
    rocketMQ入门
    跟着刚哥深入学maven(通俗易懂)
    跟着刚哥学习Spring框架--AOP(五)
    跟着刚哥学习Spring框架--通过注解方式配置Bean(四)
    跟着刚哥学习Spring框架--Spring容器(二)
  • 原文地址:https://www.cnblogs.com/lybpy/p/9961049.html
Copyright © 2020-2023  润新知