MYSQL:
1. 每一个逗号, 相当于一个中间表, 而且hive的groupby 更为严格 ;
-> 计算占比
SELECT branch_company, ROUND(SUM(pass_flag) / (SELECT SUM(pass_flag) FROM wr_test),2) rate FROM wr_test GROUP BY branch_company
-> 计算排名
SELECT FIELD, t1.field2, (SELECT DISTINCT(COUNT(field2)) FROM `test` t2 WHERE t2.field2 >= t1.field2 ) rank FROM `test` t1
Hive:
-> 不能在逗号后面直接使用() 而必须使用left join 出总值:
select round( t.s / t2.total , 3) , "" order_city_name, "" dealer_shortname , t.* from
(
select branch_company, time_date, time_unit, sum(pass_flag) s from bi_middle.wr_middle_1
group by branch_company, time_date, time_unit
) t
left join
(select sum(pass_flag) total from bi_middle.wr_middle_1) t2
-> 使用grouping sets 代替多个union;
select t1.*,t1.pass_flag/t2.total as ratio from (
select nvl(branch_company,'全国') branch_company,order_city_name,dealer_shortname,a.time_date,a.time_unit,sum(pass_flag) as pass_flag from bi_middle.wr_middle_1 a
group by branch_company,order_city_name,dealer_shortname,a.time_date,a.time_unit
grouping sets((a.time_date,a.time_unit),(branch_company,a.time_date,a.time_unit),(branch_company,order_city_name,a.time_date,a.time_unit),(branch_company,order_city_name,dealer_shortname,a.time_date,a.time_unit))
) t1
left join (
select sum(pass_flag) as total from bi_middle.wr_middle_1
) t2
# 注意事项:
-> Spark-SQL使用JDBC读取表, 默认使用一个executor去读, 如果转成并发, 需要设置分区索引去读;
1. 创建parquet存储表
CREATE TABLE IF NOT EXISTS bi_orginal.`param_code_dcs` (
`CODE_ID` string COMMENT 'CODE ID',
`TYPE` string COMMENT 'CODE类型',
`TYPE_NAME` string COMMENT 'CODE类型中文说明',
`CODE_DESC` string COMMENT 'CODE ID中文说明',
`NUM` string ,
`update_time` string
)
comment 'param_code_dcs'
STORED AS parquet
2. 向表中插入数据
insert overwrite table bi_orginal.`param_code_dcs`
SELECT CODE_ID, TYPE, TYPE_NAME, CODE_DESC, NUM, from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss') FROM artemisdcs.d_f_artemisdcs_tc_code_dcs
3. Hive中的union
SELECT * from (
SELECT sso_id from bi_orginal.som_so_user
UNION ALL
SELECT sso_id from bi_orginal.som_so_user
)t LIMIT 5
4. Hive删表:
truncate table table_name 先删除表中数据
drop table table_name 后删除HDFS上数据表