需求:统计出掉线率最高的前10基站
数据:
record_time:通话时间
imei:基站编号
cell:手机编号
drop_num:掉话的秒数
duration:通话持续总秒数
测试数据:
https://download.csdn.net/download/arne_/11262104
数据格式:
--基础数据表
create table cell_base(
record_time string,
imei string,
cell string,
ph_num int,
call_num int,
drop_num int,
duration int,
drop_rate DOUBLE,
net_type string,
erl string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
--结果表
create table cell_drop(
imei string,
total_call_num int,
total_drop_num int,
d_rate DOUBLE
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
STORED AS TEXTFILE;
--加载数据到cell_base
load data local inpath '/root/cdr_summ_imei_cell_info.csv' OVERWRITE INTO TABLE cell_monitor;
--找出掉线率最高的基站
from cell_base cm
insert overwrite table cell_drop
select cm.imei ,sum(cm.drop_num),sum(cm.duration),sum(cm.drop_num)/sum(cm.duration) d_rate
group by cm.imei
sort by d_rate desc;
--查看结果
select * from cell_drop limit 10;
+-----------------+------------------+--------------------+---------------------+--+
| imei | total_call_num | total_drop_num | d_rate
+-----------------+--------------------+-----------------+----------------------+--+
| 639876 | 1 | 734 | 0.0013623978201634877 |
| 356436 | 1 | 1028 | 9.727626459143969E-4 |
| 351760 | 1 | 1232 | 8.116883116883117E-4 |
| 368883 | 1 | 1448 | 6.906077348066298E-4 |
| 358849 | 1 | 1469 | 6.807351940095302E-4 |
| 358231 | 1 | 1613 | 6.199628022318661E-4 |
| 863738 | 2 | 3343 | 5.982650314089142E-4 |
| 865011 | 1 | 1864 | 5.36480686695279E-4 |
| 862242 | 1 | 1913 | 5.227391531625719E-4 |
| 350301 | 2 | 3998 | 5.002501250625312E-4 |
+-----------------+--------------------+-------------+------------------------+--+