• oracle 开发 第08章 分析数据


    2016-02-02

    目录

    一、评级函数
      1.RANK()和DENSE_RANK()函数
      2.CUME_DIST()和PERCENT_RANK()函数
      3.NTILE()函数
      4.ROW_NUMBER()函数
      5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数
    二、窗口函数
      1.计算累积和
      2.计算移动平均值
      3.计算中心平均值
      4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行
      5.用NTH_VALUE()函数获取第n行
    三、报表函数
      1.总计报表
      2.RATIO_TO_REPORT()函数
      3.LISTAGG()函数
      4.LAG()和LEAD()函数
      5.FIRST()和LAST()函数
    四、线性回归函数
    五、MODEL子句
      1.位置标记访问数据单元
      2.符号标记访问数据单元
      3.用between和and返回特定范围的数据单元
      4.用any和is any 访问所有数据单元
      5.用currentv()函数获取某个维度当前值
      6.用for循环访问数据单元
      7.使用is present
      8.使用presentv()函数
      9.使用presentnnv()函数
      10.使用ignore nav和keep nav
      11.使用rules udpate更新已有单元
    六、PIVOT和UNPIVOT子句
      1.pivot简单示例
      2.转换多列
      3.使用多个聚合函数
      4.使用UNPIVOT子句

    一、评级函数
    1.RANK()和DENSE_RANK()函数

    --返回数据项在分组中的排名
    select * from all_sales where rownum <= 12;
    
    select prd_type_id,
           sum(amount),
           rank() over(order by sum(amount) desc) as rank,
           dense_rank() over(order by sum(amount) desc) as dense_rank
      from all_sales
     where year = 2003
       and amount is not NULL
     group by prd_type_id
     order by prd_type_id;
    
    select prd_type_id,
           sum(amount),
           rank() over(order by sum(amount) desc) as rank,
           dense_rank() over(order by sum(amount) desc) as dense_rank
      from all_sales
     where year = 2003
    /*   and amount is not NULL*/
     group by prd_type_id
     order by prd_type_id;
    
    --NULLS FIRST和NULLS LAST子句控制空值排名
    select prd_type_id,
           sum(amount),
           rank() over(order by sum(amount) desc nulls last) as rank,
           dense_rank() over(order by sum(amount) desc nulls last) as dense_rank
      from all_sales
     where year = 2003
    /*   and amount is not NULL*/
     group by prd_type_id
     order by prd_type_id;
    
    --RANK()和PARTITION BY
    select prd_type_id,
           month,
           sum(amount),
           rank() over(partition by month order by sum(amount) desc) as rank
      from all_sales
     where year = 2003
       and amount is not NULL
     group by prd_type_id, month
     order by prd_type_id, month;
    
    --RANK()和ROLLUP()
    select prd_type_id,
           sum(amount),
           rank() over(order by sum(amount) desc) as rank
      from all_sales
     where year = 2003
     group by rollup(prd_type_id)
     order by prd_type_id;
    --RANK()和CUBE()
    select prd_type_id,
           emp_id,
           sum(amount),
           rank() over(order by sum(amount) desc) as rank
      from all_sales
     where year = 2003
     group by cube(prd_type_id, emp_id)
     order by prd_type_id, emp_id;
     --RANK()和GROUPTING SETS
     select prd_type_id,
           emp_id,
           sum(amount),
           rank() over(order by sum(amount) desc) as rank
      from all_sales
     where year = 2003
     group by cube(prd_type_id, emp_id)
     order by prd_type_id, emp_id;

    2.CUME_DIST()和PERCENT_RANK()函数

    --CUME_DIST()计算某个特定值在一组值中的累积分布
    --PERCENT_RANK计算某个值相对一组值的百分比排名
    select prd_type_id,
           sum(amount),
           cume_dist() over(order by sum(amount) desc) as cume_dist,
           percent_rank() over(order by sum(amount) desc) as percent_rank
      from all_sales
     where year = 2003
     group by prd_type_id, emp_id
     order by prd_type_id, emp_id;

    3.NTILE()函数

    --计算n分片
    select prd_type_id,
           sum(amount),
           ntile(4) over(order by sum(amount) desc) as ntile
      from all_sales
     where year = 2003
       and amount is not null
     group by prd_type_id
     order by prd_type_id;

    4.ROW_NUMBER()函数

    --从1开始,为分组中的每行返回一个数字
    select prd_type_id,
           sum(amount),
           row_number() over(order by sum(amount) desc) as row_number
      from all_sales
     where year = 2003
       and amount is not null
     group by prd_type_id
     order by prd_type_id;

    5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数

    select percentile_cont(0.6) within group(order by sum(amount) desc) as percentile_cont,
           percentile_disc(0.6) within group(order by sum(amount) desc) as percentile_disc
      from all_sales
     where year = 2003
     group by prd_type_id;

    二、窗口函数
    1.计算累积和

    select month,
           sum(amount) as month_amount,
           sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount
      from all_sales
     where year = 2003
     group by month
     order by month;
    
    select month,
           sum(amount) as month_amount,
           sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount
      from all_sales
     where year = 2003
     and month between 6 and 12
     group by month
     order by month;

    2.计算移动平均值

    select month,
           sum(amount) as month_amount,
           avg(sum(amount)) over(order by month rows between 3 preceding and current row) as moving_average
      from all_sales
     where year = 2003
     group by month
     order by month;

    3.计算中心平均值

    select month,
           sum(amount) as month_amount,
           avg(sum(amount)) over(order by month rows between 1 preceding and 1 following) as moving_average
      from all_sales
     where year = 2003
     group by month
     order by month;

    4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行

    select month,
           sum(amount) as month_amount,
           first_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as previous_month_amount,
           last_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as next_month_amount
      from all_sales
     where year = 2003
     group by month
     order by month;

    5.用NTH_VALUE()函数获取第n行

    select month,
           sum(amount) as month_amount,
           nth_value(sum(amount), 2) over(order by month rows between unbounded preceding and unbounded following) as nth_value
      from all_sales
     where year = 2003
     group by month
     order by month;
    
    select prd_type_id,
           emp_id,
           max(amount),
           nth_value(max(amount), 4) over(partition by prd_type_id order by emp_id rows between unbounded preceding and unbounded following) as nth_value
      from all_sales
     where prd_type_id between 1 and 3
     group by prd_type_id, emp_id
     order by prd_type_id, emp_id;

    三、报表函数
    1.总计报表

    select month,
           prd_type_id,
           sum(sum(amount)) over(partition by month) as total_month,
           sum(sum(amount)) over(partition by prd_type_id) as total_product_type_amount
      from all_sales
     where year = 2003
       and month <= 3
     group by month, prd_type_id
     order by month, prd_type_id;

    2.RATIO_TO_REPORT()函数

    --计算某个值在一组值的总和中所占的比率
    select month,
           prd_type_id,
           sum(amount) as prd_type_amount,
           ratio_to_report(sum(amount)) over(partition by month) as prd_type_ratio
      from all_sales
     where year = 2003
       and month <= 3
     group by month, prd_type_id
     order by month, prd_type_id;

    3.LISTAGG()函数

    --对分组内的各行排序并将分组的值集连接起来
    select * from products order by product_id;
    
    select listagg(name, ', ') within group(order by price, name) as "Product List",
           max(price) as "Most Expensive"
      from products
     where product_id <= 5;
    
    select product_id,
           product_type_id,
           name,
           listagg(name, ', ') within group(order by name) over(partition by product_type_id) as "Product List"
      from products
     where product_id <= 5
     order by product_id, product_type_id;

    4.LAG()和LEAD()函数

    --获得位于距当前行指定距离的那一行数据
    select month,
           sum(amount) as month_amount,
           lag(sum(amount), 1) over(order by month) as previous_month_amount,
           lead(sum(amount), 1) over(order by month) as next_month_amount
      from all_sales
     where year = 2003
     group by month
     order by month;

    5.FIRST()和LAST()函数

    --获取排序分组中第一个值和最后一个值
    select min(month) keep (dense_rank first order by sum(amount)) as highest_sales_month,
           min(month) keep (dense_rank last order by sum(amount)) as lowest_sales_month
      from all_sales
     where year = 2003
     group by month
     order by month;

    四、线性回归函数

    select prd_type_id,
           regr_avgx(amount, month) as avgx,
           regr_avgy(amount, month) as avgy,
           regr_count(amount, month) as count,
           regr_intercept(amount, month) as inter,
           regr_r2(amount, month) as r2,
           regr_slope(amount, month) as slope,
           regr_sxx(amount, month) as sxx,
           regr_sxy(amount, month) as sxy,
           regr_syy(amount, month) as syy
      from all_sales
     where year = 2003
     group by prd_type_id
     order by prd_type_id;
    
    select prd_type_id,
           sum(amount),
           rank() over(order by sum(amount) desc) as rank,
           percent_rank() over(order by sum(amount) desc) as percent_rank       
      from all_sales
     where year = 2003
       and amount is not null
     group by prd_type_id
     order by prd_type_id;
    
    select rank(500000) within group(order by sum(amount) desc) as rank,
           percent_rank(500000) within group(order by sum(amount) desc) as percent_rank
      from all_sales
     where year = 2003
       and amount is not null
     group by prd_type_id
     order by prd_type_id;

    五、MODEL子句
    1.位置标记访问数据单元

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount [ 1, 2004 ] = sales_amount [ 1, 2003 ], 
      sales_amount [ 2, 2004 ] = sales_amount [ 2, 2003 ] + sales_amount [ 3, 2003 ],
      sales_amount [ 3, 2004 ] = round(sales_amount [ 3, 2003 ] * 1.25, 2))
     order by prd_type_id, year, month;

    2.符号标记访问数据单元

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount [ month = 1, year = 2004 ] = sales_amount [ month = 1,year = 2003 ], 
     sales_amount [ month = 2, year = 2004] = sales_amount [ month = 2, year = 2003 ] + sales_amount [month = 3, year = 2003 ], 
     sales_amount [ month = 3, year = 2004] = round(sales_amount [ month = 3, year = 2003 ] * 1.25, 2))
     order by prd_type_id, year, month;

    3.用between和and返回特定范围的数据单元

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount[ 1, 2004] = round(avg(sales_amount) [ month between 1 and 3 ,2003], 2))
     order by prd_type_id, year, month;

    4.用any和is any 访问所有数据单元

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount[ 1, 2004] = round(sum(sales_amount) [ any, year is any], 2))
     order by prd_type_id, year, month;

    5.用currentv()函数获取某个维度当前值

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount[ 1, 2004] = round(sales_amount[ currentv(),2003] * 1.25, 2))
     order by prd_type_id, year, month;

    6.用for循环访问数据单元

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount[ for month from 1 to 3 increment 1,2004] = round(sales_amount[currentv(),2003] * 1.25,2))
     order by prd_type_id, year, month;

    7.使用is present

    --如果cell指定的行在model子句执行之前存在,那么is present返回true
    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
       case when sales_amount [ currentv(), 2003 ] is present 
         then
               round(sales_amount [ currentv(), 2003 ] * 1.25, 2)
          else
               0
          end)
     order by prd_type_id, year, month;

    8.使用presentv()函数

    --如果cell引用的行在model子句执行前存在,那么presentv(cell,expr1,expr2)返回表达式expr1;如果不存在,返回expr2
    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
       presentv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0)
       )
     order by prd_type_id, year, month;

    9.使用presentnnv()函数

    --如果cell引用的行在model子句执行前存在,并且该单元值不为空,那么presentv(cell,expr1,expr2)返回表达式expr1;如果行不存在或单元值为空,返回expr2
    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model 
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
       presentnnv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0)
       )
     order by prd_type_id, year, month;

    10.使用ignore nav和keep nav

    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model ignore nav
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
     (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
       round(sales_amount[currentv(),2003] * 1.25,2)
       )
     order by prd_type_id, year, month;

    11.使用rules udpate更新已有单元

    --在单元不存在的情况下不创建新行
    select prd_type_id, year, month, sales_amount
      from all_sales
     where prd_type_id between 1 and 2
       and emp_id = 21 
       model
       partition by(prd_type_id) 
       dimension by(month, year) 
       measures(amount sales_amount)
       rules update (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = 
       round(sales_amount[currentv(),2003] * 1.25,2)
       )
     order by prd_type_id, year, month;

    六、PIVOT和UNPIVOT子句
    1.pivot简单示例

    select *
      from (select month, prd_type_id, amount
              from all_sales
             where year = 2003
               and prd_type_id in (1, 2, 3));
    
    select *
      from (select month, prd_type_id, amount
              from all_sales
             where year = 2003
               and prd_type_id in (1, 2, 3)) 
     pivot(sum(amount) for month in(1 as jan,
                                    2 as feb,
                                    3 as mar,
                                    4 as apr))
     order by prd_type_id;

    2.转换多列

    SELECT 
      * 
    FROM
      (SELECT 
        MONTH,
        prd_type_id,
        amount 
      FROM
        all_sales 
      WHERE YEAR = 2003 
        AND prd_type_id IN (1, 2, 3)) pivot (
        SUM(amount) FOR (MONTH, prd_type_id) IN (
          (1, 2) AS jan_prdtype2,
          (2, 3) AS feb_prdtype3,
          (3, 1) AS mar_prdtype1,
          (4, 2) AS apr_prdtype2
        )
      ) ;

    3.使用多个聚合函数

    SELECT 
      * 
    FROM
      (SELECT 
        MONTH,
        prd_type_id,
        amount 
      FROM
        all_sales 
      WHERE YEAR = 2003 
        AND prd_type_id IN (1, 2, 3)
        ) 
       PIVOT (
        SUM(amount) AS sum_amount,
        AVG(amount) AS avg_amount
        FOR (month) IN (1 AS JAN,2 AS FEB
        )
      )
      ORDER BY prd_type_id ;

    4.使用UNPIVOT子句

    SELECT *
    FROM pivot_sales_data
    UNPIVOT (
     amount FOR month IN (JAN,FEB,MAR,APR)
     )
     ORDER BY prd_type_id;

    【参考资料】

    [1] Jason Price.精通Oracle Database 12c SQL&PLSQL编程(第3版).[M].北京:清华大学出版社,2014

  • 相关阅读:
    无人值守安装linux
    数组中只出现过一次的数字 牛客网 剑指Offer
    数组中出现次数超过一半的数字 牛客网 剑指Offer
    数据流中的中位数 牛客网 剑指Offer
    数字在排序数组中出现的次数 牛客网 剑指Offer
    数值的整数次方 牛客网 剑指Offer
    按之字形顺序打印二叉树 牛客网 剑指Offer
    把数组排成最小的数 牛客网 剑指Offer
    把字符串转换成整数 牛客网 剑指Offer
    把二叉树打印成多行 牛客网 剑指Offer
  • 原文地址:https://www.cnblogs.com/cenliang/p/5124706.html
Copyright © 2020-2023  润新知