1 --测试Top N后的其它统计 2 A = LOAD '/TraceParser/blackcore/' USING PigStorage() as (lk_id:chararray,host:chararray); 3 --DUMP A; 4 5 B = GROUP A BY lk_id; 6 B = FOREACH B GENERATE group as lk_id,COUNT($1) as amount; 7 --DUMP B; 8 9 --TOP N的数据集 10 C = ORDER B BY amount DESC; 11 C = LIMIT C 5; 12 --DUMP C; 13 14 --除TOP N后的数据集归并为其它 15 D = JOIN B BY lk_id LEFT OUTER,C BY lk_id; 16 --DESCRIBE D; 17 --DUMP D; 18 E = FILTER D BY C::lk_id is null OR C::lk_id == ''; 19 --DUMP E; 20 E = FOREACH E GENERATE B::lk_id,B::amount; 21 --DESCRIBE E; 22 --SUM的用法? 23 24 --DUMP E; 25 --DUMP A; 26 H = JOIN E BY B::lk_id,A BY lk_id; 27 DESCRIBE H; 28 --H: {E::B::lk_id: chararray,E::B::amount: long,A::lk_id: chararray,A::host: chararray} 29 --DUMP H; 30 I = GROUP H all; 31 I = FOREACH I GENERATE '其它',COUNT($1) as amount; 32 DUMP I;