2016-02-02
目录
一、评级函数
1.RANK()和DENSE_RANK()函数
2.CUME_DIST()和PERCENT_RANK()函数
3.NTILE()函数
4.ROW_NUMBER()函数
5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数
二、窗口函数
1.计算累积和
2.计算移动平均值
3.计算中心平均值
4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行
5.用NTH_VALUE()函数获取第n行
三、报表函数
1.总计报表
2.RATIO_TO_REPORT()函数
3.LISTAGG()函数
4.LAG()和LEAD()函数
5.FIRST()和LAST()函数
四、线性回归函数
五、MODEL子句
1.位置标记访问数据单元
2.符号标记访问数据单元
3.用between和and返回特定范围的数据单元
4.用any和is any 访问所有数据单元
5.用currentv()函数获取某个维度当前值
6.用for循环访问数据单元
7.使用is present
8.使用presentv()函数
9.使用presentnnv()函数
10.使用ignore nav和keep nav
11.使用rules udpate更新已有单元
六、PIVOT和UNPIVOT子句
1.pivot简单示例
2.转换多列
3.使用多个聚合函数
4.使用UNPIVOT子句
一、评级函数
1.RANK()和DENSE_RANK()函数
--返回数据项在分组中的排名 select * from all_sales where rownum <= 12; select prd_type_id, sum(amount), rank() over(order by sum(amount) desc) as rank, dense_rank() over(order by sum(amount) desc) as dense_rank from all_sales where year = 2003 and amount is not NULL group by prd_type_id order by prd_type_id; select prd_type_id, sum(amount), rank() over(order by sum(amount) desc) as rank, dense_rank() over(order by sum(amount) desc) as dense_rank from all_sales where year = 2003 /* and amount is not NULL*/ group by prd_type_id order by prd_type_id; --NULLS FIRST和NULLS LAST子句控制空值排名 select prd_type_id, sum(amount), rank() over(order by sum(amount) desc nulls last) as rank, dense_rank() over(order by sum(amount) desc nulls last) as dense_rank from all_sales where year = 2003 /* and amount is not NULL*/ group by prd_type_id order by prd_type_id; --RANK()和PARTITION BY select prd_type_id, month, sum(amount), rank() over(partition by month order by sum(amount) desc) as rank from all_sales where year = 2003 and amount is not NULL group by prd_type_id, month order by prd_type_id, month; --RANK()和ROLLUP() select prd_type_id, sum(amount), rank() over(order by sum(amount) desc) as rank from all_sales where year = 2003 group by rollup(prd_type_id) order by prd_type_id; --RANK()和CUBE() select prd_type_id, emp_id, sum(amount), rank() over(order by sum(amount) desc) as rank from all_sales where year = 2003 group by cube(prd_type_id, emp_id) order by prd_type_id, emp_id; --RANK()和GROUPTING SETS select prd_type_id, emp_id, sum(amount), rank() over(order by sum(amount) desc) as rank from all_sales where year = 2003 group by cube(prd_type_id, emp_id) order by prd_type_id, emp_id;
2.CUME_DIST()和PERCENT_RANK()函数
--CUME_DIST()计算某个特定值在一组值中的累积分布 --PERCENT_RANK计算某个值相对一组值的百分比排名 select prd_type_id, sum(amount), cume_dist() over(order by sum(amount) desc) as cume_dist, percent_rank() over(order by sum(amount) desc) as percent_rank from all_sales where year = 2003 group by prd_type_id, emp_id order by prd_type_id, emp_id;
3.NTILE()函数
--计算n分片 select prd_type_id, sum(amount), ntile(4) over(order by sum(amount) desc) as ntile from all_sales where year = 2003 and amount is not null group by prd_type_id order by prd_type_id;
4.ROW_NUMBER()函数
--从1开始,为分组中的每行返回一个数字 select prd_type_id, sum(amount), row_number() over(order by sum(amount) desc) as row_number from all_sales where year = 2003 and amount is not null group by prd_type_id order by prd_type_id;
5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数
select percentile_cont(0.6) within group(order by sum(amount) desc) as percentile_cont, percentile_disc(0.6) within group(order by sum(amount) desc) as percentile_disc from all_sales where year = 2003 group by prd_type_id;
二、窗口函数
1.计算累积和
select month, sum(amount) as month_amount, sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount from all_sales where year = 2003 group by month order by month; select month, sum(amount) as month_amount, sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount from all_sales where year = 2003 and month between 6 and 12 group by month order by month;
2.计算移动平均值
select month, sum(amount) as month_amount, avg(sum(amount)) over(order by month rows between 3 preceding and current row) as moving_average from all_sales where year = 2003 group by month order by month;
3.计算中心平均值
select month, sum(amount) as month_amount, avg(sum(amount)) over(order by month rows between 1 preceding and 1 following) as moving_average from all_sales where year = 2003 group by month order by month;
4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行
select month, sum(amount) as month_amount, first_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as previous_month_amount, last_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as next_month_amount from all_sales where year = 2003 group by month order by month;
5.用NTH_VALUE()函数获取第n行
select month, sum(amount) as month_amount, nth_value(sum(amount), 2) over(order by month rows between unbounded preceding and unbounded following) as nth_value from all_sales where year = 2003 group by month order by month; select prd_type_id, emp_id, max(amount), nth_value(max(amount), 4) over(partition by prd_type_id order by emp_id rows between unbounded preceding and unbounded following) as nth_value from all_sales where prd_type_id between 1 and 3 group by prd_type_id, emp_id order by prd_type_id, emp_id;
三、报表函数
1.总计报表
select month, prd_type_id, sum(sum(amount)) over(partition by month) as total_month, sum(sum(amount)) over(partition by prd_type_id) as total_product_type_amount from all_sales where year = 2003 and month <= 3 group by month, prd_type_id order by month, prd_type_id;
2.RATIO_TO_REPORT()函数
--计算某个值在一组值的总和中所占的比率 select month, prd_type_id, sum(amount) as prd_type_amount, ratio_to_report(sum(amount)) over(partition by month) as prd_type_ratio from all_sales where year = 2003 and month <= 3 group by month, prd_type_id order by month, prd_type_id;
3.LISTAGG()函数
--对分组内的各行排序并将分组的值集连接起来 select * from products order by product_id; select listagg(name, ', ') within group(order by price, name) as "Product List", max(price) as "Most Expensive" from products where product_id <= 5; select product_id, product_type_id, name, listagg(name, ', ') within group(order by name) over(partition by product_type_id) as "Product List" from products where product_id <= 5 order by product_id, product_type_id;
4.LAG()和LEAD()函数
--获得位于距当前行指定距离的那一行数据 select month, sum(amount) as month_amount, lag(sum(amount), 1) over(order by month) as previous_month_amount, lead(sum(amount), 1) over(order by month) as next_month_amount from all_sales where year = 2003 group by month order by month;
5.FIRST()和LAST()函数
--获取排序分组中第一个值和最后一个值 select min(month) keep (dense_rank first order by sum(amount)) as highest_sales_month, min(month) keep (dense_rank last order by sum(amount)) as lowest_sales_month from all_sales where year = 2003 group by month order by month;
四、线性回归函数
select prd_type_id, regr_avgx(amount, month) as avgx, regr_avgy(amount, month) as avgy, regr_count(amount, month) as count, regr_intercept(amount, month) as inter, regr_r2(amount, month) as r2, regr_slope(amount, month) as slope, regr_sxx(amount, month) as sxx, regr_sxy(amount, month) as sxy, regr_syy(amount, month) as syy from all_sales where year = 2003 group by prd_type_id order by prd_type_id; select prd_type_id, sum(amount), rank() over(order by sum(amount) desc) as rank, percent_rank() over(order by sum(amount) desc) as percent_rank from all_sales where year = 2003 and amount is not null group by prd_type_id order by prd_type_id; select rank(500000) within group(order by sum(amount) desc) as rank, percent_rank(500000) within group(order by sum(amount) desc) as percent_rank from all_sales where year = 2003 and amount is not null group by prd_type_id order by prd_type_id;
五、MODEL子句
1.位置标记访问数据单元
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount [ 1, 2004 ] = sales_amount [ 1, 2003 ], sales_amount [ 2, 2004 ] = sales_amount [ 2, 2003 ] + sales_amount [ 3, 2003 ], sales_amount [ 3, 2004 ] = round(sales_amount [ 3, 2003 ] * 1.25, 2)) order by prd_type_id, year, month;
2.符号标记访问数据单元
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount [ month = 1, year = 2004 ] = sales_amount [ month = 1,year = 2003 ], sales_amount [ month = 2, year = 2004] = sales_amount [ month = 2, year = 2003 ] + sales_amount [month = 3, year = 2003 ], sales_amount [ month = 3, year = 2004] = round(sales_amount [ month = 3, year = 2003 ] * 1.25, 2)) order by prd_type_id, year, month;
3.用between和and返回特定范围的数据单元
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount[ 1, 2004] = round(avg(sales_amount) [ month between 1 and 3 ,2003], 2)) order by prd_type_id, year, month;
4.用any和is any 访问所有数据单元
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount[ 1, 2004] = round(sum(sales_amount) [ any, year is any], 2)) order by prd_type_id, year, month;
5.用currentv()函数获取某个维度当前值
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount[ 1, 2004] = round(sales_amount[ currentv(),2003] * 1.25, 2)) order by prd_type_id, year, month;
6.用for循环访问数据单元
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount[ for month from 1 to 3 increment 1,2004] = round(sales_amount[currentv(),2003] * 1.25,2)) order by prd_type_id, year, month;
7.使用is present
--如果cell指定的行在model子句执行之前存在,那么is present返回true select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = case when sales_amount [ currentv(), 2003 ] is present then round(sales_amount [ currentv(), 2003 ] * 1.25, 2) else 0 end) order by prd_type_id, year, month;
8.使用presentv()函数
--如果cell引用的行在model子句执行前存在,那么presentv(cell,expr1,expr2)返回表达式expr1;如果不存在,返回expr2 select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = presentv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0) ) order by prd_type_id, year, month;
9.使用presentnnv()函数
--如果cell引用的行在model子句执行前存在,并且该单元值不为空,那么presentv(cell,expr1,expr2)返回表达式expr1;如果行不存在或单元值为空,返回expr2 select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = presentnnv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0) ) order by prd_type_id, year, month;
10.使用ignore nav和keep nav
select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model ignore nav partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = round(sales_amount[currentv(),2003] * 1.25,2) ) order by prd_type_id, year, month;
11.使用rules udpate更新已有单元
--在单元不存在的情况下不创建新行 select prd_type_id, year, month, sales_amount from all_sales where prd_type_id between 1 and 2 and emp_id = 21 model partition by(prd_type_id) dimension by(month, year) measures(amount sales_amount) rules update (sales_amount [ for month from 1 to 3 increment 1, 2004 ] = round(sales_amount[currentv(),2003] * 1.25,2) ) order by prd_type_id, year, month;
六、PIVOT和UNPIVOT子句
1.pivot简单示例
select * from (select month, prd_type_id, amount from all_sales where year = 2003 and prd_type_id in (1, 2, 3)); select * from (select month, prd_type_id, amount from all_sales where year = 2003 and prd_type_id in (1, 2, 3)) pivot(sum(amount) for month in(1 as jan, 2 as feb, 3 as mar, 4 as apr)) order by prd_type_id;
2.转换多列
SELECT * FROM (SELECT MONTH, prd_type_id, amount FROM all_sales WHERE YEAR = 2003 AND prd_type_id IN (1, 2, 3)) pivot ( SUM(amount) FOR (MONTH, prd_type_id) IN ( (1, 2) AS jan_prdtype2, (2, 3) AS feb_prdtype3, (3, 1) AS mar_prdtype1, (4, 2) AS apr_prdtype2 ) ) ;
3.使用多个聚合函数
SELECT * FROM (SELECT MONTH, prd_type_id, amount FROM all_sales WHERE YEAR = 2003 AND prd_type_id IN (1, 2, 3) ) PIVOT ( SUM(amount) AS sum_amount, AVG(amount) AS avg_amount FOR (month) IN (1 AS JAN,2 AS FEB ) ) ORDER BY prd_type_id ;
4.使用UNPIVOT子句
SELECT * FROM pivot_sales_data UNPIVOT ( amount FOR month IN (JAN,FEB,MAR,APR) ) ORDER BY prd_type_id;
【参考资料】
[1] Jason Price.精通Oracle Database 12c SQL&PLSQL编程(第3版).[M].北京:清华大学出版社,2014